Computer >> Computer tutorials >  >> Programming >> Programming

Rabin-Karp Algorithm


Rabin-Karp is another pattern searching algorithm to find the pattern in a more efficient way. It also checks the pattern by moving window one by one, but without checking all characters for all cases, it finds the hash value. When the hash value is matched, then only it tries to check each character. This procedure makes the algorithm more efficient.

The time complexity is O(m+n), but for the worst case, it is O(mn).

Input and Output

Input:
Main String: “ABAAABCDBBABCDDEBCABC”, Pattern “ABC”
Output:
Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18

Algorithm

rabinKarpSearch(text, pattern, prime)

Input − The main text and the pattern. Another prime number of find hash location

Output − location where patterns are found

Begin
   patLen := pattern Length
   strLen := string Length
   patHash := 0 and strHash := 0, h := 1
   maxChar := total number of characters in character set

   for index i of all character in pattern, do
      h := (h*maxChar) mod prime
   done

   for all character index i of pattern, do
      patHash := (maxChar*patHash + pattern[i]) mod prime
      strHash := (maxChar*strHash + text[i]) mod prime
   done

   for i := 0 to (strLen - patLen), do
      if patHash = strHash, then
         for charIndex := 0 to patLen -1, do
            if text[i+charIndex] ≠ pattern[charIndex], then
               break the loop
         done

         if charIndex = patLen, then
            print the location i as pattern found at i position.
      if i < (strLen - patLen), then
         strHash := (maxChar*(strHash – text[i]*h)+text[i+patLen]) mod prime, then
      if strHash < 0, then
         strHash := strHash + prime
   done
End

Example

#include<iostream>
#define MAXCHAR 256
using namespace std;

void rabinKarpSearch(string mainString, string pattern, int prime, int array[], int *index) {
   int patLen = pattern.size();
   int strLen = mainString.size();
   int charIndex, pattHash = 0, strHash = 0, h = 1;

   for(int i = 0; i<patLen-1; i++) {
      h = (h*MAXCHAR) % prime;    //calculating h = {d^(M-1)} mod prime
   }
   
   for(int i = 0; i<patLen; i++) {
      pattHash = (MAXCHAR*pattHash + pattern[i]) % prime;    //pattern hash value
      strHash = (MAXCHAR*strHash + mainString[i]) % prime;   //hash for first window
   }
   
   for(int i = 0; i<=(strLen-patLen); i++) {
      if(pattHash == strHash) {      //when hash values are same check for matching
         for(charIndex = 0; charIndex < patLen; charIndex++) {
            if(mainString[i+charIndex] != pattern[charIndex])
               break;
         }

         if(charIndex == patLen) {    //the pattern is found
            (*index)++;
            array[(*index)] = i;
         }
      }

      if(i < (strLen-patLen)) {    //find hash value for next window
         strHash = (MAXCHAR*(strHash - mainString[i]*h) + mainString[i+patLen])%prime;
         if(strHash < 0) {
            strHash += prime;    //when hash value is negative, make it positive
         }
      }
   }
}

int main() {
   string mainString = "ABAAABCDBBABCDDEBCABC";
   string pattern = "ABC";
   int locArray[mainString.size()];
   int prime = 101;
   int index = -1;
   rabinKarpSearch(mainString, pattern, prime, locArray, &index);

   for(int i = 0; i <= index; i++) {
      cout << "Pattern found at position: " << locArray[i]<<endl;
   }
}

Output

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18