Compiler Design File DIpen Kalsi
Compiler Design File DIpen Kalsi
Practical File
Semester: 5
Group: 5 CST1
Group: 5 CST1
THEORY: Lex is a tool or a computer program that generates Lexical Analyzers (converts the
stream of characters into tokens). The Lex tool itself is a compiler. The Lex compiler takes the
input and transforms that input into input patterns. It is commonly used with YACC(Yet Another
Compiler Compiler). It was written by Mike Lesk and Eric Schmidt.
YACC (for "yet another compiler compiler.") is the standard parser generator for the Unix
operating system. An open-source program, Yacc generates code for the parser in the O
programming language. The acronym is usually rendered in lowercase but is occasionally seen as
YACC or Yacc.
SOURCE CODE:
%{
int vow_count=0;
int const_count =0;
%}
%%
[aeiouAEIOU] {vow_count++;}
[a-zA-Z] {const_count++;}
%%
int yywrap(){}
int main()
{
printf("Enter the string of vowels and consonants:");
yylex();
printf("Number of vowels are: %d\n", vow_count);
printf("Number of consonants are: %d\n", const_count);
return 0;
}
OUTPUT:
Microsoft Windows [Version 10.0.22621.2283] (c) Microsoft Corporation. All rights
reserved.
C:\Users\Dipen\OneDrive\Desktop\LF>flex a.l.txt
C:\Users\Dipen\OneDrive\Desktop\LF>gcc lex.yy.c
C:\Users\Dipen\OneDrive\Desktop\LF>a.exe
^Z
%{
/* Definition section */
#include<stdio.h>
#include "y.tab.h"
/* Rule Section */
%%
[0-9]+ {
yylval=atoi(yytext);
return NUMBER;
[\t] ;
[\n] return 0;
. return yytext[0];
%%
int yywrap() {
return 1;
%{
/* Definition section */
#include<stdio.h>
int flag=0;
%}
%token NUMBER
/* Rule Section */
%%
ArithmeticExpression: E{
printf("\nResult=%d\n", $$);
return 0;
};
E:E'+'E {$$=$1+$3;}
|E'-'E {$$=$1-$3;}
|E'*'E {$$=$1*$3;}
|E'/'E {$$=$1/$3;}
|E'%'E {$$=$1%$3;}
|'('E')' {$$=$2;}
| NUMBER {$$=$1;}
%%
//driver code
void main() {
printf("\nEnter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Division, Modulus and Round brackets:\n");
yyparse();
if(flag==0)
void yyerror() {
flag=1;
OUTPUT:
C:\Users\Dipen\OneDrive\Desktop\LF\yacc>flex calc.l.txt
C:\Users\Dipen\OneDrive\Desktop\LF\yacc>a.exe
Enter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Divison, Modulus and Round brackets:
12-4
Result=8
C:\Users\Dipen\OneDrive\Desktop\LF\yacc>a.exe
Enter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Divison, Modulus and Round brackets:
1023+521-
C:\Users\Dipen\OneDrive\Desktop\LF\yacc>a.exe
Enter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Divison, Modulus and Round brackets:
10/5
Result=2
C:\Users\Dipen\OneDrive\Desktop\LF\yacc>a.exe
Enter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Divison, Modulus and Round brackets:
(12+5)*2
Result=34
C:\Users\Dipen\OneDrive\Desktop\LF\yacc>a.exe
Enter Any Arithmetic Expression which can have operations Addition, Subtraction,
Multiplication, Divison, Modulus and Round brackets:
1215
Result=2
Theory: Context Free Grammar is a formal grammar; the syntax structure of a formal language
can be described using context-free grammar (CFG) a type of formal grammar. The grammar has
4 tuples: (V, T, P, S).
T - It is a set of terminals.
And the left-hand side of G here in the example can only be a variable. It cannot be a terminal
But on the right-hand side it can be a variable or terminal, or both combination of variable and
terminal.
Above equation states that every production which carry any combination of ‘V’ variable or ‘T’
terminal is said to be a context-free grammar.
SOURCE CODE:
#include <bits/stdc++.h>
using namespace std;
int length(string str){
int i=0;
while (str[i])
i++;
return i;
}
void first(){
char str[100];
cout<<"\nThe grammar is as follows --> \nS -> aS\nS -> Sb\nS -> ab\n";
cout<<"Enter a string --> ";
cin>>str;
if(str[0]!='a'){
cout<<"String is invalid because of incorrect first character!!";
exit(0);
}
int n=1;
while(str[n]=='a')
n++;
if ( str[n] != 'b'){
cout<<"String does not belong to grammar!!";
exit(0);
}
n++;
while (str[n]=='b')
n++;
if (str[n] != '\0'){
cout<<"String does not belong to grammar!!";
exit(0);
}
cout<<"String is Valid!!";
}
void second(){
char str[100];
cout<<"\nThe grammar is as follows --> \nS -> aSa\nS -> bSb\nS -> a\nS -> b\n";
cout<<"Enter a string --> ";
cin>>str;
if(str[0]!='a'&& str[0]!='b'){
cout<<"String is invalid because of incorrect first character!!";
exit(0);
}
if(length(str)%2 ==0){
cout<<"String is invalid because of Even Length!!";
exit(0);
}
int i = 0;
int j = length(str)-1;
while(i<length(str) && j>=0){
if(str[i] != str[j])
cout<<"String is Invalid!!";
i++;
j--;
}
cout<<"String belongs to the Grammar!!";
}
void third(){
char str[100];
cout<<"\nThe grammar is as follows --> \nS -> aSbb\nS -> abb\n";
cout<<"Enter a string --> ";
cin>>str;
if(str[0]!='a'){
cout<<"String is invalid because of incorrect first character!!";
exit(0);
}
int n=1;
int c=1;
while(str[n]=='a'){
n++;
c++;
}
int j = 1;
if (str[n] != 'b'){
cout<<"String does not belong to grammar!!";
exit(0);
}
n++;
while (str[n]=='b'){
n++;
j++;
}
if (2*c!=j || str[n] != '\0'){
cout<<"String does not belong to grammar!!";
exit(0);
}
cout<<"String is Valid!!";
}
void fourth(){
char str[100];
cout<<"\nThe grammar is as follows --> \nS -> aSb\nS -> ab\n";
cout<<"Enter a string --> ";
cin>>str;
if(str[0]!='a'){
cout<<"String is invalid because of incorrect first character!!";
exit(0);
}
int n=1;
int c=1;
while(str[n]=='a'){
n++;
c++;
}
int j = 1;
if (str[n] != 'b'){
cout<<"String does not belong to grammar!!";
exit(0);
}
n++;
while (str[n]=='b'){
n++;
j++;
}
if (c!=j || str[n]!='\0'){
cout<<"String does not belong to grammar!!";
exit(0);
}
cout<<"String is Valid!!";
}
int main()
{
int a;
cout<<"1 --> \nS -> aS\nS -> Sb\nS -> ab\nString is of the form - aab\n\n";
cout<<"2 --> \nS -> aSa\nS -> bSb\nS -> a\nS -> b\nThe Language generated is - All Odd Length
Palindromes\n\n";
cout<<"3 --> \nS -> aSbb\nS -> abb\nThe Language generated is - anb2n , where n>1\n\n";
cout<<"4 --> \nS -> aSb\nS -> ab\nThe Language generated is - anbn where n>0\n\n";
cout<<"5 --> Exit!!\n\n";
do
{
cout<<"Choose one input --> ";
cin>>a;
switch(a)
{
case 1:
{
first();
break;
}
case 2:
{
second();
break;
}
case 3:
{
third();
break;
}
case 4:
{
fourth();
break;
}
default:
cout<<"Wrong Input !!";
}
}
while (a!=5);
return 0;
}
OUTPUT:
EXPERIMENT-3
THEORY: We break the given string into tokens and check each word if it is a keyword of C++
language. Keywords are predefined words, we cannot name variables as keywords. There are 32
keywords in C++. We print the count of keywords present in the string.
SOURCE CODE:
#include <bits/stdc++.h>
using namespace std;
if(!m.empty()){
for(auto it:m){
cout << it.first << " " << it.second << endl;
}
}
else cout << "No keyword present" << endl;
}
int main(){
string str;
cout << "Enter a string: ";
getline(cin,str);
simple_tokenizer(str);
cout << endl;
OUTPUT:
Enter a string: Hello char Dipen Kalsi int void auto int bye int
auto 1
char 1
int 3
void 1
Dipen Kalsi
01514812721 5CST1
EXPERIMENT-4
THEORY:
Left Recursion - The generation is left-recursive if the leftmost symbol on the right side is
equivalent to the nonterminal on the left side. For Ex: exp → exp + term.
A grammar that contains a production having left recursion is called a Left-Recursive Grammar.
Similarly, if the rightmost symbol on the right side is equal to the left side is called Right-
Recursion.
The above example will go in an infinite loop because the function E keeps calling itself which
causes a problem for a parser to go in an infinite loop which is a never ending process so to avoid
this infinite loop problem we do Elimination of left recursion.
A-->bA'
E --> TE'
E'--> eps/+TE'
SOURCE CODE:
#include<bits/stdc++.h>
string ip,op1,op2,temp;
char c;
int n,j,l;
cin>>c;
ip.push_back(c);
op1 += ip + "\'->";
ip += "->";
op2+=ip;
cin>>n;
for(int i=0;i<n;i++){
cin>>temp;
sizes[i] = temp.size();
ip+=temp;
if(i!=n-1)
ip += "|";
for(int i=0,k=3;i<n;i++){
if(ip[0] == ip[k]){
for(l=k+1;l<k+sizes[i];l++)
op1.push_back(ip[l]);
k=l+1;
op1.push_back(ip[0]);
op1 += "\'|";
} else{
if(ip[k] != '#'){
for(j=k;j<k+sizes[i];j++)
op2.push_back(ip[j]);
k=j+1;
op2.push_back(ip[0]);
op2 += "\'|";
} else{
op2.push_back(ip[0]);
op2 += "\'";
op1 += "#";
cout<<op2<<endl<<op1<<endl;
return 0;
}
OUTPUT: