Author: Jacob Park
A string is a zero-indexed array of $n$ characters.
See Link. $$ \begin{align} H(\text{s}) &= \sum_{i=0}^{n-1} \text{s}\left[i\right] \cdot p^{i} \pmod m \\ \text{h}\left[0\right] &= \text{s}\left[0\right] \\ \text{h}\left[k\right] &= \left(\text{h}\left[k-1\right] \cdot p + \text{s}\left[k\right]\right) \pmod m\\ \text{p}\left[0\right] &= 1 \\ \text{p}\left[k\right] &= \left(\text{p}\left[k-1\right] \cdot p\right) \pmod m \\ H(\text{s}\left[a...b\right]) &= \left(\text{h}\left[b\right] - \text{h}\left[a-1\right] \cdot \text{p}\left[b-a+1\right]\right) \pmod m \end{align} $$
package strings.polynomial_hashing;
import java.util.*;
public class PolynomialHasher {
private static final long PA = 31;
private static final long PB = 37;
private static final long MA = (long) (1E9 + 7);
private static final long MB = (long)(1E9 + 9);
private final String string;
private final long[] powers;
private final long[] hashes;
private final long p;
private final long m;
public PolynomialHasher(String string, boolean alphaFlag) {
this.string = string;
this.powers = new long[string.length()];
this.hashes = new long[string.length()];
this.p = (alphaFlag) ? PA : PB;
this.m = (alphaFlag) ? MA : MB;
initialize();
}
private void initialize() {
if (string.isEmpty()) {
return;
}
powers[0] = 1;
hashes[0] = string.charAt(0);
for (int index = 1; index < string.length(); index++) {
powers[index] = (powers[index - 1] * p) % m;
hashes[index] = (hashes[index - 1] * p + string.charAt(index)) % m;
}
}
public long hash(int beginIndex, int endIndex) {
if (beginIndex < 0 || beginIndex > endIndex || endIndex > string.length()) {
throw new IllegalArgumentException();
}
if (string.isEmpty()) {
return 0;
}
if (beginIndex == 0) {
return hashes[endIndex];
} else {
// Note: Java's % operator returns the remainder which can be negative.
// Accordingly, A mod B == ((A % B) + B) % B
return ((hashes[endIndex] - hashes[beginIndex - 1] * powers[endIndex - beginIndex + 1]) % m + m) % m;
}
}
}
package strings.polynomial_hashing;
import java.util.*;
final String pattern = "abracadabra";
final String text = "abacadabrabracabracadabrabrabracad";
final PolynomialHasher patternHasher = new PolynomialHasher(pattern, true);
final PolynomialHasher textHasher = new PolynomialHasher(text, true);
final long patternHash = patternHasher.hash(0, pattern.length() - 1);
for (int beginIndex = 0; beginIndex <= text.length() - pattern.length(); beginIndex++) {
final int endIndex = beginIndex + pattern.length() - 1;
final long textSubstringHash = textHasher.hash(beginIndex, endIndex);
if (patternHash == textSubstringHash) {
System.out.println(String.format(
"Pattern Matched [beginIndex=%d, endIndex=%d]", beginIndex, endIndex));
break;
}
}
package strings.suffix_array;
import java.util.*;
public class SuffixArray {
private final String string;
private final int[] indices;
public SuffixArray(String string) {
this.string = string;
this.indices = new SuffixComparator(string).compute();
}
/**
* Returns the index of the 'i'th smallest suffix.
*/
public int index(int index) {
return indices[index];
}
/**
* Returns the 'i'th smallest suffix.
*/
public String select(int index) {
return string.substring(indices[index]);
}
/**
* Returns the number of suffixes strictly less than the query.
*/
public int rank(String query) {
int headIndex = 0;
int tailIndex = string.length() - 1;
while (headIndex <= tailIndex) {
final int medianIndex = headIndex + (tailIndex - headIndex) / 2;
final int comparison = compare(query, indices[medianIndex]);
if (comparison < 0) {
tailIndex = medianIndex - 1;
} else if (comparison > 0) {
headIndex = medianIndex + 1;
} else {
return medianIndex;
}
}
return headIndex;
}
/**
* Returns whether the query is less than the suffix from medianIndex.
*/
private int compare(String query, int medianIndex) {
int iIndex = medianIndex;
int jIndex = 0;
while (iIndex < string.length() && jIndex < query.length()) {
if (query.charAt(jIndex) != string.charAt(iIndex)) {
return query.charAt(jIndex) - string.charAt(iIndex);
}
iIndex++;
jIndex++;
}
if (iIndex < string.length()) {
return -1;
}
if (jIndex < query.length()) {
return +1;
}
return 0;
}
/**
* Returns the length of the longest common prefix of the 'i'th smallest suffix and
* the 'i'-1st smallest suffix.
*/
public int lcpLength(int index) {
int iIndex = indices[index];
int jIndex = indices[index - 1];
int length = 0;
while (iIndex < string.length() && jIndex < string.length()) {
if (string.charAt(iIndex) != string.charAt(jIndex)) {
return length;
}
iIndex++;
jIndex++;
length++;
}
return length;
}
/**
* Returns the LCP Array which stores the length of the longest common prefix between
* all pairs of consecutive suffixes in a sorted suffix array, using the Kasai's
* Algorithm in O(n).
* Note: LCP(i,j) = Min(lcp[i], lcp[i+1], ..., lcp[j-1])
*/
public int[] lcpArray() {
final int[] lcp = new int[string.length()];
final int[] inverseIndices = new int[string.length()];
for (int index = 0; index < string.length(); index++) {
inverseIndices[indices[index]] = index;
}
int length = 0;
for (int iIndex = 0; iIndex < string.length(); iIndex++) {
// Edge Case:
if (inverseIndices[iIndex] == string.length() - 1) {
length = 0;
continue;
}
// Main Case:
int jIndex = indices[inverseIndices[iIndex] + 1];
while ((iIndex + length) < string.length() &&
(jIndex + length) < string.length()) {
if (string.charAt(iIndex) != string.charAt(jIndex)) {
break;
}
length++;
}
lcp[inverseIndices[iIndex]] = length;
if (length > 0) {
length--;
}
}
return lcp;
}
private static class SuffixComparator implements Comparator<Integer> {
private final String string;
private final int[] currOrder;
private final int[] nextOrder;
private int size;
public SuffixComparator(String string) {
this.string = string;
this.currOrder = new int[string.length()];
this.nextOrder = new int[string.length()];
this.size = 0;
}
public int[] compute() {
final Integer[] indices = new Integer[string.length()];
// Initialize:
for (int index = 0; index < string.length(); index++) {
indices[index] = index;
currOrder[index] = string.charAt(index);
nextOrder[index] = 0;
}
// Prefix Doubling Size Stable Sort:
for (size = 1;
nextOrder[string.length() - 1] != string.length() - 1;
size <<= 1) {
// If Radix Sort, Remove \log N.
Arrays.sort(indices, this);
// Suffix of 2^(N+1) = Two Splits of 2^N => Ordering Changes If
// First Splits Equivalent.
for (int index = 0; index < string.length() - 1; index++) {
nextOrder[index + 1] = nextOrder[index] +
(compare(indices[index], indices[index + 1]) < 0 ? 1 : 0);
}
// Update Order:
for (int index = 0; index < string.length(); index++) {
currOrder[indices[index]] = nextOrder[index];
}
}
// Unboxing:
return Arrays.stream(indices).mapToInt(Integer::intValue).toArray();
}
@Override
public int compare(Integer lIndex, Integer rIndex) {
// Compare First Split:
if (currOrder[lIndex] != currOrder[rIndex]) {
return currOrder[lIndex] - currOrder[rIndex];
}
// Compare Second Split b/c Equivalent Prefixes:
if ((lIndex += size) < string.length() &&
(rIndex += size) < string.length()) {
return currOrder[lIndex] - currOrder[rIndex];
}
return rIndex - lIndex;
}
}
}
package strings.suffix_array;
import java.util.*;
// Suffixes:
// 0 => banana
// 1 => anana
// 2 => nana
// 3 => ana
// 4 => na
// 5 => a
// Suffix Array:
// 5 => a
// 3 => ana
// 1 => anana
// 0 => banana
// 4 => na
// 2 => nana
// LCP Array:
// [1, 3, 0, 0, 2, 0]
final String string = "banana";
final SuffixArray suffixArray = new SuffixArray(string);
System.out.println("Sorted Suffixes:");
for (int index = 0; index < string.length(); index++) {
System.out.println(suffixArray.select(index));
}
final int[] lcpArray = suffixArray.lcpArray();
System.out.printf("LCP Array: %s\n", Arrays.toString(lcpArray));