Originally Posted by
concerto49
Assuming it looks like:
<key1> <value1> <key2> <value>... and IF search string is a substring of a key, return each of these results?
How are you hashing it right now? Are you hashing every substring possible?
The code I have written to read the file is as mentioned below :-
Please suggest us the changes or any other better way so that we can improve the reading and search performance by less than 5 milliseconds.
1. File CharByCharSearch.java
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
public class CharByCharSearch {
private static HashMap<String, String> mapForKeyValues = new HashMap<String, String>();
private static CharByCharSearch getHtml = null;
private static ThreadLocal localPool = new ThreadLocal();
private static BufferedReader dataInputStream = null;
static {
getHtml = new CharByCharSearch();
dataInputStream = FileReader.getFileContentsBR();
getHtml.grabHTMLLinksSearch();
localPool.set(mapForKeyValues);
}
public CharByCharSearch() {
}
public void grabHTMLLinksSearch() {
String html = "";
try {
long milliSeconds1 = System.currentTimeMillis();
long milliSeconds2 = 0l;
html = dataInputStream.readLine();
while (null != html) {
milliSeconds2 = System.currentTimeMillis();
String firstS = html.substring(
html.toLowerCase().indexOf("=") + 2, html.length());
mapForKeyValues.put(html.substring(0, html.indexOf("<") - 1)
.toLowerCase(), firstS.substring(0,
firstS.indexOf(" ") - 1));
html = dataInputStream.readLine();
}
System.out.println("time took to search the keyword@@@@ "
+ (milliSeconds2 - milliSeconds1));
} catch (Exception e) {
System.out.println("error when getting the data");
e.printStackTrace();
} finally {
try {
if (null != dataInputStream) {
dataInputStream.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
public ArrayList<String> search(String searchWord) {
ArrayList<String> linkURLS = new ArrayList<String>();
String searchKey = searchWord.toLowerCase();
String[] searchKeyValues = searchKey.split(" ");
int len = searchKeyValues.length;
HashMap<String, String> hashMap = (HashMap<String, String>) localPool
.get();
Set<String> keys = hashMap.keySet();
for (String key : keys) {
int index = searchByChar.kmp(searchKey, key);
if (key.length() >= 10) {
if (-1 != index) {// rule 1 & 2
linkURLS.add(mapForKeyValues.get(key));
}
} else if (key.equalsIgnoreCase(searchKey)) {// rule 3
linkURLS.add(mapForKeyValues.get(key));
} else if (key.endsWith("_")) {// rule 5
if (-1 != index) {
linkURLS.add(mapForKeyValues.get(key));
}
} else if (len > 0) {// rule 4
for (int i = 0; i < len; i++) {
if (searchKeyValues[i].equalsIgnoreCase(key)) {
linkURLS.add(mapForKeyValues.get(key));
break;
}
}
}
}
return linkURLS;
}
public static void main(String[] args) {
ArrayList<String> linkURLS = getHtml
.search("My Name Is jay_ patil_00_.");
for (String value : linkURLS) {
System.out.println(value);
}
}
}
--------------------------------------------------------------------------
2. Second java file searchByChar.java
public class searchByChar {
public static int[] prekmp(String pattern) {
int[] next = new int[pattern.length()];
int i=0, j=-1;
next[0]=-1;
while (i<pattern.length()-1) {
while (j>=0 && pattern.charAt(i)!=pattern.charAt(j))
j = next[j];
i++;
j++;
next[i] = j;
}
return next;
}
public static int kmp(String text, String pattern) {
int[] next = prekmp(pattern);
int i=0, j=0;
while (i<text.length()) {
while (j>=0 && text.charAt(i)!=pattern.charAt(j))
j = next[j];
i++; j++;
if (j==pattern.length())
return i-pattern.length();
}
return -1;
}
}
--------------------------------------------------------------------------
The content of text file are like:
patil_00_ <A HREF="http://support.jay.com:8080/index.jsp" title="View the Supp" target=_blank class="table">patil_00_</A>
jay_ <A HREF="http://support.sac.com:8080/index.jsp" title="View the jsp" target=_blank class="link">jay_</A>
...........................
and the 3rd file FileReader.java read the text file using DataInputStream and return the dataInputStream