class KMP { // Copyright by Michael Lecuyer 1998. // This program may be used in any way you choose. // Please acknowledge my copyright where you use this in your program. // No need for a public copyright notice. private int nextLen; private byte pat[]; // Byte representation of pattern private int patLen; private int partial; // Bytes of a partial match found at the end of a text buffer private byte next[]; // internal KMP table /** * Knuth-Morris-Pratt text search *

Scans text left to right using what it knows of the pattern * quickly determine if a match has been made in the text. * This cuts down considerably on the number of comparisons between * the pattern and text found in pure brute-force compares. *

The particular version used here is * from "Handbook of Algorithms and Data * Structures", G.H. Gonnet & R. Baeza-Yates. * *

	* String pattern = "and ";
	* 
* BM bm = new BM(); * bm.compile(pattern); * * int bcount; * int search; * while ((bcount = f.read(b)) >= 0) * { * System.out.println("New Block:"); * search = 0; * while ((search = bm.search(b, search, bcount-search)) >= 0) * { * if (search >= 0) * { * System.out.println("full pattern found at " + search); *
* search += pattern.length(); * continue; * } * } * if ((search = bm.partialMatch()) >= 0) * { * System.out.println("Partial pattern found at " + search); * } * } *
*/ KMP() { next = null; } /** * Compiles the text pattern for searching. * * @param pattern What we're looking for. */ public void compile(String pattern) { pat = pattern.getBytes(); patLen = pat.length; int i = 0; int j = -1; next = new byte[patLen]; nextLen = next.length; next[0] = -1; while (true) { if (j == -1 || pat[i] == pat[j]) { i++; j++; next[i] = (byte)((pat[j] == pat[i]) ? next[j] : j); } else j = next[j]; if (i == nextLen - 1) break; } } /** * Search for the compiled pattern in the given text. * A side effect of the search is the notion of a partial * match at the end of the searched buffer. * This partial match is helpful in searching text files when * the entire file doesn't fit into memory. * * @param text Buffer containing the text * @param start Start position for search * @param length Length of text in the buffer to be searched. * * @return position in buffer where the pattern was found. * @see patialMatch */ // search the text for the pattern. // Return the position the object started // Return -1 if not found. // A partial match at the end of the buffer // can be checked by calling isPatial(); // public int search(byte text[], int start, int length) { int i, j; int t = start; // starting text position int textLen = length + start; partial = -1; // assume full match or no match if (next == null) return -1; // no pattern compiled, nothing matches. if (patLen == 0) return 0; // no pattern, everything matches. for (j = 0; t < textLen; ) { if (j == -1 || pat[j] == text[t]) { t++; j++; if (j == patLen) return t - j; } else j = next[j]; } if (t == textLen && j > 0) // if we're near end of buffer -- { partial = t - j; return -1; // not a real match } return -1; // no match } /** * Returns the position at the end of the text buffer where a partial match was found. *

* In many case where a full text search of a large amount of data * precludes access to the entire file or stream the search algorithm * will note where the final partial match occurs. * After an entire buffer has been searched for full matches calling * this method will reveal if a potential match appeared at the end. * This information can be used to patch together the partial match * with the next buffer of data to determine if a real match occurred. * * @return -1 the number of bytes that formed a partial match, -1 if no * partial match */ public int partialMatch() { return partial; } }