0% found this document useful (0 votes)
31 views43 pages

Daa Lab 9

The document describes a C++ program that implements the Rabin-Karp algorithm for string matching. It includes the code for the Rabin-Karp algorithm and tests it on different length strings, recording the running times. It also modifies the program to count the number of character comparisons instead of occurrences.

Uploaded by

Mugdha 2727
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
31 views43 pages

Daa Lab 9

The document describes a C++ program that implements the Rabin-Karp algorithm for string matching. It includes the code for the Rabin-Karp algorithm and tests it on different length strings, recording the running times. It also modifies the program to count the number of character comparisons instead of occurrences.

Uploaded by

Mugdha 2727
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 43

CSE-2012

LAB LPS-9
Name :- Mugdha
Registration Number :- 20BPS1095
1. Implement using C++ the Rabin-Karp algorithm.
CODE –

#include<iostre>
#include<string.h>
using namespace std;
// d is the number of characters in the
input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)
{ int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for
pattern int t = 0; // hash value
for txt int h = 1;

// The value of h would be "pow(d, M-


1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;
// Calculate the hash value of pattern and first
// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t + txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{
// Check the hash values of current window of text
// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] =
"GEEK"; char
txt[100], pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call
search(pat, txt,
q);
cout<<count;
return 0;
}

Output –

2. For various values of n (length of S1) and m (length of S2)


compute the running times of the brute-force program in IPS1 and
the Rabin-Karp program in LPS1. Record the data in a table as
below. Here T1(P) and T2(P) are the running times of the brute
force program and the Rabin-Karp program respectively.

CODE:

#include <iostream>
#include <string.h>
#include <cstdlib>
#include
<sys/time.h> using
namespace std;
#define d 256
int count = 0;

void bfp(string s1, string s2){ int n =


s1.length(), m = s2.length();
int i = 0, j = 0, flag = 1, c =
0; while(i < n){ j = 0;
while(j < m){
flag = 1;
if (s1[i] != s2[j]){
flag = 0;
i++;
break; }
else{ i++;
j++;
}
}
if (flag == 1) c++;
}
}

void rabin_karp_algo(string pat, string txt, int


q){ int M = pat.length();
int N =
txt.length(); int i, j; int p = 0;
// hash value for pattern
int t = 0; // hash value for
txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}
// Slide the pattern over text one by
one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}
i
// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-
1]
if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

int main(){ string s1 = "a", s2


= "aaaa";

int n = 4, m = 0, an[10], am[10];


float t1[10], t2[10];

for(int i = 0; i < 10
;i++){ n = n + 2*i;
m=m+
i; an[i] = n; am[i] = m; s1 =
"aaaaaaaaaaaaaaaaaaaaaaaaaaa", s2 =
"aaaaaaaa"; struct timeval start, end;
// start timer.
gettimeofday(&start,
NULL);
// unsync the I/O of C++.
ios_base::sync_with_stdio(false);
// function
call
bfp(s1,s2);
// stop
timer.
gettimeofday(&end,
NULL);
// Calculating total time taken by the
program. double time_taken;
time_taken = (end.tv_sec - start.tv_sec) * 1e6;
time_taken = (time_taken + (end.tv_usec -
start.tv_usec))
* 1e-6; t1[i] = time_taken;
struct timeval start1, end1;
// start timer.
gettimeofday(&start1,
NULL);
// unsync the I/O of C++.
ios_base::sync_with_stdio(false);
// A prime
number int q =
101;
// function call
rabin_karp_algo(s2,s1,q)
; // stop timer.
gettimeofday(&end1,
NULL);
// Calculating total time taken by the program.
double time_taken1;
time_taken1 = (end1.tv_sec - start1.tv_sec) * 1e6;
time_taken1 = (time_taken1 + (end1.tv_usec -
start1.tv_usec))
* 1e-6; t2[i] = time_taken1;
}

cout<<"S.No."<<"\t"<<"n"<<"\t"<<"m"<<"\t"<<"T1(p)"<<"\t\t
"<<"T2(p) in seconds"<<endl;
for(int i = 0 ; i < 10 ;i++){
printf("%d\t%d\t%d\t%f\t%f\n",(i+1),an[i], am[i], t1[i],
t2[i]);
}}

Output –
3. Run your program for IPS1 with the strings given in the link
https://www-igm.univmlv.fr/~lecroq/string/examples/exp5.html

CODE:

#include
<iostream> using
namespace std;
int main(){
string s1,s2; cin>>s1; cin>>s2;
int n = s1.length(), m =
s2.length(); int i = 0, j = 0, flag =
1, c = 0; while(i < n){ j = 0;
while(j < m){
flag = 1;
if (s1[i] !=
s2[j]){ flag = 0;
i++; break; }
else{ i++; j++;
}
}
if (flag == 1) c++;
}
cout<<c
;

Output –

4. Run your program for LPS1 with the strings given in the link
https://www-igm.univ-mlv.fr/~lecroq/string/examples/exp5.html

CODE:

#include<iostrea
m>
#include<string.h
>

using namespace std;

// d is the number of characters in the


input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
-> A prime number
*/
void search(char pat[], char txt[], int q)
{
int M =
strlen(pat); int N
= strlen(txt); int
i, j;
int p = 0; // hash value for
pattern int t = 0; // hash
value for txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++) h
= (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text: Remove


// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t = (t
+ q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call
search(pat, txt,
q); cout<<count;
return 0;
}
Output –

5. Modify your program for IPS1 to count only the total number of
character comparisons and print the same.

CODE:

#include <iostream>
using namespace std;

int main(){ string s1,s2; cin>>s1; cin>>s2; int


n = s1.length(), m = s2.length(); int i = 0, j
= 0, flag = 1, c = 0, charCmp = 0; while(i <
n){ j = 0; while(j < m){ flag = 1;
if (s1[i] != s2[j]){
charCmp++;
flag =
0; i++;
break; }
else{ i++;
j++;
}
}
if (flag == 1) c++;
}
cout<<"Total Occurences: "<<c<<endl;
cout<<"Character Comparison: "<<charCmp;

OUTPUT:

6. Modify your program for LPS1 to count only the total number of
character comparisons and print the same.
CODE:

#include<iostrea
m>
#include<string.h
>

using namespace std;


// d is the number of characters in the
input alphabet #define d 256 int count =
0, charCmp = 0;
/* pat ->
pattern txt
-> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)
{
int M =
strlen(pat); int N
= strlen(txt); int
i, j;
int p = 0; // hash value for
pattern int t = 0; // hash
value for txt int h = 1;

// The value of h would be


"pow(d, M-1)%q" for (i = 0; i < M-
1; i++) h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}
// Slide the pattern over text one by
one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j]){
bre
ak; }
else{
charCmp++;
}
}
// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-
1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100]; scanf("%s",
txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call search(pat, txt,


q); cout<<"Total Occurences:
"<<count<<endl; cout<<"Character
Comparison: "<<charCmp; return 0;
}
OUTPUT:

7. For various values of n (length of S1) and m (length of S2) count


the total number of character comparisons of the bruteforce
program in IPS1 and the Rabin-Karp program in LPS1 (Use your
programs for LPS5 and LPS6). Record the data in a table as below.
Here C1(P) and C2(P) are the total number of character
comparisons of the brute force program and the Rabin-Karp
program respectively.

CODE:

#include <iostream>
#include <string.h>
#include <cstdlib>
#include
<sys/time.h> using
namespace std;

#define d 256
int c1[10], c2[10];

int bfp(string s1, string s2){


int count = 0, charCmp =
0; int n = s1.length(), m =
s2.length(); int i = 0, j = 0, flag =
1, c = 0;
while(i < n){ j =
0; while(j <
m){ flag =
1;
if (s1[i] != s2[j]){
flag = 0;
charCmp+
+; i++; break;
} else{ i++;
j++;
}
} if (flag == 1)
c++;
}
return charCmp;
}

int rabin_karp_algo(string pat, string txt, int


q){ int count = 0, charCmp = 0;
int M =
pat.length(); int N
= txt.length(); int i,
j;
int p = 0; // hash value for
pattern int t = 0; // hash
value for txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{
// Check the hash values of current window of text
// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M;
j++)
{ if (txt[i+j] != pat[j])
break;
else{
charCmp++;
}
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}

}
return charCmp;
}

int main(){ string s1 = "a", s2


= "aaaa";

int n = 4, m = 0, an[10], am[10];

for(int i = 0; i < 10
;i++){ n = n + 2*i;
m=m+
i; an[i] =
n; am[i]
= m;
s1 = "GCATCGCAGAGAGTATACAGTACG", s2 =
"GCAGAGAG";
s1 = s1 + s2;
c1[i] =
bfp(s1,s2);
c2[i] =
rabin_karp
_algo(s2,s1
,101);

cout<<"S.No."<<"\t"<<"n"<<"\t"<<"m"<<"\t"<<"C1(p)
Comp."<<"\t"<<"C2(p)
Comp."<<endl;
for(int i = 0 ; i < 10 ;i++){
printf("%d\t%d\t%d\t%d\t\t%d\n",(i+1),an[i], am[i], c1[i],
c2[i]);
}
}
Output -

8. Using your program for LPS1 investigate for which values of the
input, the worst-case running time is achieved.
CODE:

#include<iostrea
m>
#include<string.h
>

using namespace std;

// d is the number of characters in the


input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)
{ int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for
pattern int t = 0; // hash value
for txt int h = 1;

// The value of h would be "pow(d, M-


1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t + txt[i])%q;
}
// Slide the pattern over text one by
one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one
by one if ( p == t ) {
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text:


Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] =
"GEEK"; char
txt[100], pat[100];
scanf("%s", txt);
scanf("%s", pat);
// A prime
number int q =
101;

// function call
search(pat, txt,
q); cout<<count;
return 0;
}

OUTPUT:

9. How many spurious hits does the Rabin-Karp string matching


algorithm encounter in the text T = “3141512653849792” when
looking for all occurrences of the pattern P = “26”, working modulo
q = 11 and over the alphabet A= {0, 1, 2, . . . , 9}?
CODE:
#include<iostrea
m>
#include<string.h
>

using namespace std;


// d is the number of characters in the
input alphabet #define d 256 int count
= 0, sp = 0;
/* pat -> pattern txt -
> text q -> A prime
number
*/
void search(char pat[], char txt[], int q)
{ int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for
pattern int t = 0; // hash value
for txt int h = 1;
// The value of h would be "pow(d,
M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

}
else if (p != t) sp++;
// Calculate hash value for next window of text:
Remove
// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to
positive if
(t < 0) t =
(t + q);
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
11;

// function call
search(pat, txt, q);
cout<<"Ocuurence:
"<<count<<endl;
cout<<"Spurious hits
"<<sp; return 0;
}

Output –
10. Run LPS1 for the text abdcabcde and the pattern ab.
CODE:
#include<iostrea
m>
#include<string.h
>

using namespace std;

// d is the number of characters in the


input alphabet #define d 256 int count
= 0;
/* pat -> pattern
txt -> text
q -> A prime number
*/
void search(char pat[], char txt[], int q)

{
int M = strlen(pat);
int N =
strlen(txt); int i, j; int p = 0;
// hash value for pattern
int t = 0; // hash value for
txt int h = 1;

// The value of h would be "pow(d,


M-1)%q" for (i = 0; i < M-1; i++)
h = (h*d)%q;

// Calculate the hash value of pattern and first


// window of text
for (i = 0; i < M;
i++)
{ p = (d*p +
pat[i])%q; t =
(d*t
+ txt[i])%q;
}

// Slide the pattern over text one by


one for (i = 0; i <= N - M; i++)
{

// Check the hash values of current window of text


// and pattern. If the hash values match then only
// check for characters one by
one if ( p == t )
{
/* Check for characters one by
one */ for (j = 0; j < M; j++)
{ if (txt[i+j] != pat[j])
break;
}

// if p == t and pat[0...M-1] = txt[i, i+1, ...i+M-


1] if (j == M){
//printf("Pattern found at index %d
\n", i); count++;
}

// Calculate hash value for next window of text: Remove


// leading digit, add trailing digit
if ( i < N-M )
{ t = (d*(t - txt[i]*h) + txt[i+M])%q;

// We might get negative value of t, converting it


// to positive if
(t < 0) t
= (t +
q);
}
}
}

/* Driver Code
*/ int main()
{
// char txt[] = "GEEKS FOR GEEKS";
// char pat[] = "GEEK";
char txt[100],
pat[100];
scanf("%s", txt);
scanf("%s", pat);

// A prime
number int q =
101;

// function call
search(pat, txt,
q); cout<<count;
return 0;
}
Output –

You might also like