Kasai’s Algorithm is used to get the Longest Common Prefix (LCP) array from suffix array. At first suffix arrays are found. After that Kasai's algorithm takes the suffix array list to find LCP.
For the LCP array, it takes O(m log n), where m is pattern length and n is the length of the text. The Kasai’s Algorithm, it takes O(n) for searching the pattern in the main string.
Input and Output
Input: Main String: “banana” Output: Suffix Array : 5 3 1 0 4 2 Common Prefix Array : 1 3 0 0 2 0
Algorithm
buildSuffixArray(text)
Input: The main string
Output: The suffix array, built from the main text
Begin n := size of text for i := 0 to n, do suffArray[i].index := i suffArray[i].rank[0] := text[i] if (i+1) < n, then suffArray[i].rank[1] := text[i+1] else suffArray[i].rank[1] := -1 do sort the suffix array define index array to store indexes for k := 4 to (2*n)-1, increase k by k*2, do currRank := 0 prevRank := suffArray[0].rank[0] suffArray[0].rank[0] := currRank index[suffArray[0].index] = 0 for all character index i of text, do if suffArray[i].rank[0] = prevRank AND suffArray[i].rank[1] = suffArray[i-1].rank[1], then prevRank := suffArray[i].rank[0] suffArray[i].rank[0] := currRank else prevRank := suffArray[i].rank[0] suffArray[i].rank[0] := currRank + 1 currRank := currRank + 1 index[suffArray[i].index] := i done for all character index i of text, do nextIndex := suffArray[i].index + k/2 if nextIndex< n, then suffArray[i].rank[1] := suffArray[index[nextIndex]].rank[0] else suffArray[i].rank[1] := -1 done sort the suffArray done for all character index i of text, do insert suffArray[i].index into suffVector done End
kasaiAlgorithm(text, suffVector)
Input − The main text, and the suffix vector as a list of suffixes
Output: The location of where longest common prefix is found
Begin n := size of suffVector define longPrefix list of size n and fill all entries with 0 define suffInverse list of size n and fill all entries with 0 for all index values ‘i’ of suffVector, do suffInverse[suffVector[i]] = i done k := 0 for i := 0 to n-1, do if suffInverse[i] = n-1 then k :=0 ignore the bottom part and go for next iteration. j := suffVector[suffInverse[i]+1] while (i+k)<n AND (j+k) < n and text[i+k] = text[j+k], do increase k by 1 done longPrefix[suffInverse[i]] := k if k > 0 then decrease k by 1 done return longPrefix End
Example
#include<iostream> #include<vector> #include<algorithm> using namespace std; struct suffix { int index; int rank[2]; // To store rank pair }; bool compare(suffix s1, suffix s2) { //compare suffixes for sort function if(s1.rank[0] == s2.rank[0]) { if(s1.rank[1] < s2.rank[1]) return true; else return false; }else { if(s1.rank[0] < s2.rank[0]) return true; else return false; } } vector<int> buildSuffixArray(string mainString) { int n = mainString.size(); suffix suffixArray[n]; for (int i = 0; i < n; i++) { suffixArray[i].index = i; suffixArray[i].rank[0] = mainString[i] - 'a'; //store old rank suffixArray[i].rank[1] = ((i+1)<n)?(mainString[i+1]-'a'):-1; //rank after alphabetical ordering } sort(suffixArray, suffixArray+n, compare); //sort suffix array upto first 2 characters int index[n]; //index in suffixArray for (int k = 4; k < 2*n; k = k*2) { //increase k as power of 2 int currRank = 0; int prevRank = suffixArray[0].rank[0]; suffixArray[0].rank[0] = currRank; index[suffixArray[0].index] = 0; for (int i = 1; i < n; i++) { // Assigning rank to all suffix if (suffixArray[i].rank[0] == prevRank && suffixArray[i].rank[1] == suffixArray[i-1].rank[1]) { prevRank = suffixArray[i].rank[0]; suffixArray[i].rank[0] = currRank; } else{ //increment rank and assign prevRank = suffixArray[i].rank[0]; suffixArray[i].rank[0] = ++currRank; } index[suffixArray[i].index] = i; } for (int i = 0; i < n; i++) { // Assign next rank to every suffix int nextIndex = suffixArray[i].index + k/2; suffixArray[i].rank[1] = (nextIndex < n)? suffixArray[index[nextIndex]].rank[0]: -1; } sort(suffixArray, suffixArray+n, compare); //sort upto first k characters } vector<int>suffixVector; for (int i = 0; i < n; i++) suffixVector.push_back(suffixArray[i].index); //index of all suffix to suffix vector return suffixVector; } vector<int> kasaiAlgorithm(string mainString, vector<int> suffixVector) { int n = suffixVector.size(); vector<int> longPrefix(n, 0); //size n and initialize with 0 vector<int> suffixInverse(n, 0); for (int i=0; i < n; i++) suffixInverse[suffixVector[i]] = i; //fill values of inverse Suffix list int k = 0; for (int i=0; i<n; i++) { //for all suffix in main string if (suffixInverse[i] == n-1) { //when suffix at position (n-1) k = 0; continue; } int j = suffixVector[suffixInverse[i]+1]; //nest string of suffix list while (i+k<n && j+k<n && mainString[i+k]==mainString[j+k]) //start from kth index k++; longPrefix[suffixInverse[i]] = k; // prefix for the current suffix. if (k>0) k--; //remofe first character of string } return longPrefix; } void showArray(vector<int> vec) { vector<int>::iterator it; for (it = vec.begin(); it < vec.end() ; it++) cout << *it << " "; cout << endl; } int main() { string mainString = "banana"; vector<int>suffixArray = buildSuffixArray(mainString); int n = suffixArray.size(); cout<< "Suffix Array : "<<endl; showArray(suffixArray); vector<int>commonPrefix = kasaiAlgorithm(mainString, suffixArray); cout<< "\nCommon Prefix Array : "<<endl; showArray(commonPrefix); }
Output
Suffix Array : 5 3 1 0 4 2 Common Prefix Array : 1 3 0 0 2 0