Computer >> Computer tutorials >  >> Programming >> Programming

Boyer Moore Algorithm


It is another approach of Boyer Moore Algorithm. Sometimes it is called the Good Suffix Heuristic method. For this case, a preprocessing table is created as suffix table. In this procedure, the substring or pattern is searched from the last character of the pattern. When a substring of main string matches with a substring of the pattern, it moves to find other occurrences of the matched substring. It can also move to find a prefix of the pattern which is a suffix of main string. Otherwise, it moves the whole length of the pattern.

Input and Output

Input:
Main String: “ABAAABCDBBABCDDEBCABC”, Pattern: “ABC”
Output:
Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18

Algorithm

fullSuffixMatch(shiftArray, borderArray, pattern)

Input − Array to store shift locations, the border array and the pattern to search.

Output − Fill the shift array and the border array.

Begin
   n := pattern length
   j := n
   j := n+1
   borderArray[i] := j

   while i > 0, do
      while j <= n AND pattern[i-1] ≠ pattern[j-1], do
         if shiftArray[j] = 0, then
            shiftArray[j] := j-i;
         j := borderArray[j];
      done

      decrease i and j by 1
      borderArray[i] := j
   done
End

 partialSuffixMatch(shiftArray, borderArray, pattern)

Input− Array to store shift locations, the border array and the pattern to search.

Output − Fill the shift array and the border array.

Begin
   n := pattern length
   j := borderArray[0]

   for index of all characters ‘i’ of pattern, do
      if shiftArray[i] = 0, then
         shiftArray[i] := j
      if i = j then
         j := borderArray[j]
   done
End

searchPattern(text, pattern)

Input: The main text and the pattern, that will be searched

Output − The indexes where the pattern is found

Begin
   patLen := pattern length
   strLen := text size

   for all entries of shiftArray, do
      set all entries to 0
   done

   call fullSuffixMatch(shiftArray, borderArray, pattern)
   call partialSuffixMatch(shiftArray, borderArray, pattern)
   shift := 0

   while shift <= (strLen - patLen), do
      j := patLen -1
      while j >= 0 and pattern[j] = text[shift + j], do
         decrease j by 1
      done

      if j < 0, then
         print the shift as, there is a match
         shift := shift + shiftArray[0]
      else
         shift := shift + shiftArray[j+1]
   done
End

Example

#include<iostream>
using namespace std;

void fullSuffixMatch(int shiftArr[], int borderArr[], string pattern) {
   int n = pattern.size();       //find length of pattern
   int i = n;
   int j = n+1;
   borderArr[i] = j;

   while(i > 0) {
      //search right when (i-1)th and (j-1)th item are not same
      while(j <= n && pattern[i-1] != pattern[j-1] ) {
         if(shiftArr[j] == 0)
            shiftArr[j] = j-i;     //shift pattern from i to j
         j = borderArr[j];       //update border
      }
      i--;
      j--;
      borderArr[i] = j;
   }  
}

void partialSuffixMatch(int shiftArr[], int borderArr[], string pattern) {
   int n = pattern.size();    //find length of pattern
   int j;
   j = borderArr[0];

   for(int i = 0; i<n; i++) {
      if(shiftArr[i] == 0)
         shiftArr[i] = j;        //when shift is 0, set shift to border value
         if(i == j)
            j = borderArr[j];    //update border value
   }
}

void searchPattern(string mainString, string pattern, int array[], int *index) {
   int patLen = pattern.size();
   int strLen = mainString.size();
   int borderArray[patLen+1];
   int shiftArray[patLen + 1];

   for(int i = 0; i<=patLen; i++) {
      shiftArray[i] = 0;     //set all shift array to 0
   }

   fullSuffixMatch(shiftArray, borderArray, pattern);
   partialSuffixMatch(shiftArray, borderArray, pattern);
   int shift = 0;

   while(shift <= (strLen - patLen)) {
      int j = patLen - 1;
      while(j >= 0 && pattern[j] == mainString[shift+j]) {
         j--;         //reduce j when pattern and main string character is matching
      }

      if(j < 0) {
         (*index)++;
         array[(*index)] = shift;
         shift += shiftArray[0];
      }else {
          shift += shiftArray[j+1];
      }
   }
}

int main() {
   string mainString = "ABAAABCDBBABCDDEBCABC";
   string pattern = "ABC";
   int locArray[mainString.size()];
   int index = -1;
   searchPattern(mainString, pattern, locArray, &index);

   for(int i = 0; i <= index; i++) {
      cout << "Pattern found at position: " << locArray[i]<<endl;
    }
}

Output

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18