Cs3501-Compiler Design Lab Manual
Cs3501-Compiler Design Lab Manual
AND ENGINEERING
REG.NO…………………………………..…
NAME……………………………………….
SRI RANGAPOOPATHI COLLEGE OF ENGINEERING
Alampoondi-604 151 Gingee TK.
BONAFIDE CERTIFICATE
NAME :
YEAR/SEM : BRANCH
REGISTER NO.
Certified that this is a bonafide record of work done by the above student in
The laboratory during the
Academic year
LIST OF EXPERIMENTS:
1. Using the LEX tool, Develop a lexical analyzer to recognize a few patterns in C. (Ex.
identifiers, constants, comments, operators etc.). Create a symbol table, while
recognizing identifiers
2. Implement a Lexical Analyzer using LEX Tool
3. Generate YACC specification for a few syntactic categories.
a. Program to recognize a valid arithmetic expression that uses operator +, -, * and/
b. Program to recognize a valid variable which starts with a letter followed by any
number of letters or digits.
c. Program to recognize a valid control structures syntax of C language (For loop,
while loop, if-else, if-else-if, switch-case, etc.).
d. Implementation of calculator using LEX and YACC
4. Generate three address code for a simple program using LEX and YACC.
5. Implement type checking using Lex and Yacc.
6. Implement simple code optimization techniques (Constant folding, Strength reduction
and Algebraic transformation)
7. Implement back-end of the compiler for which the three address code is given as
input and the 8086 assembly language code is produced as output
30 PERIODS
TOTAL: 75 PERIODS
COURSE OUTCOMES:
2
TABLE OF CONTENTS
Staff
EXP Date Experiment Name Marks Sign
/NO.
1
Using the LEX tool, Develop a lexical analyzer to
recognize a few patterns in C. (Ex.
identifiers, constants, comments, operators etc.).
Create a symbol table, while recognizing identifiers
3
Ex.No :1
DATE:
AIM:
To develop a lexical analyzer to identify identifiers, constants, comments, operators etc using C program
ALGORITHM:
Step6: Separate the operators of the input program and display it.
4
PROGRAM:
//Develop a lexical analyzer to recognize a few patterns in C.
#include<string.h>
#include<ctype.h>
#include<stdio.h>
#include<stdlib.h>
void keyword(char str[10])
{
if(strcmp("for",str)==0||strcmp("while",str)==0||strcmp("do",str)==0||strcmp("int",str)==0||strcmp("float",
str)==0||strcmp("char",str)==0||strcmp("double",str)==0||strcmp("printf",str)==0||strcmp("switch",str)==0||
strcmp("case",str)==0)
printf("\n%s is a keyword",str);
else
printf("\n%s is an identifier",str);
}
void main()
{
FILE *f1,*f2,*f3;
char c,str[10],st1[10];
int num[100],lineno=0,tokenvalue=0,i=0,j=0,k=0;
f1=fopen("input","r");
f2=fopen("identifier","w");
f3=fopen("specialchar","w");
while((c=getc(f1))!=EOF)
{
if(isdigit(c))
{
tokenvalue=c-'0';
c=getc(f1);
while(isdigit(c))
{
tokenvalue*=10+c-'0';
c=getc(f1);
}
num[i++]=tokenvalue;
ungetc(c,f1);
}
else
if(isalpha(c))
{
putc(c,f2);
c=getc(f1);
while(isdigit(c)||isalpha(c)||c=='_'||c=='$')
{
putc(c,f2);
c=getc(f1);
}
putc(' ',f2);
ungetc(c,f1);
}
else
if(c==' '||c=='\t')
printf(" ");
5
else
if(c=='\n')
lineno++;
else
putc(c,f3);
}
fclose(f2);
fclose(f3);
fclose(f1);
printf("\n the no's in the program are:");
for(j=0;j<i;j++)
printf("\t%d",num[j]);
printf("\n");
f2=fopen("identifier","r");
k=0;
printf("the keywords and identifier are:");
while((c=getc(f2))!=EOF)
if(c!=' ')
str[k++]=c;
else
{
str[k]='\0';
keyword(str);
k=0;
}
fclose(f2);
f3=fopen("specialchar","r");
printf("\n Special Characters are");
while((c=getc(f3))!=EOF)
printf("\t%c",c);
printf("\n");
fclose(f3);
printf("Total no of lines are:%d",lineno);
}
6
OUTPUT:
RESULT
Thus the C program for implementation of a lexical analyzer to recognize a few patterns was executeand
verified successfully.
7
Ex. No:2
DATE:
AIM:
To write a ‘C’ program to implement a lexical analyzer for separation of tokens using LEX Tool.
ALGORITHM:
Step 3: Check for the given list of keywords and print them as keyword if it is encountered.
Step 5: For a function, print the beginning and ending of the function block.
Step 6: Similarly print the corresponding statements for numbers, identifiers and assignment operators.
Step 7: In the main function get the input file as argument and open the file in read mode.
Step 8: Then read the file and print the corresponding lex statement given above.
8
PROGRAM 1:
%{
#include<stdio.h>
%}
%%
if|else|while|int|switch|for {printf("%s is a keyword",yytext);}
[a-z|A-Z]([a-z|A-Z]|[0-9])* {printf("%s is an identifier",yytext);}
[0-9]* {printf("%s is a number",yytext);}
%%
int main()
{
yylex();
return 0;
}
int yywrap()
{
}
OUTPUT
9
PROGRAM 2:
%{
%}
identifier [a-z|A-Z]|[a-z|A-Z|0-9]*
%%
#.* {printf("\n%s is a preprocessor dir",yytext);}
int {printf("\n\t%s is a keyword",yytext);}
{identifier}\( {printf("\n\nFUNCTION\n\t%s",yytext);}
\{ {printf("\nBLOCK BEGINS");}
\} {printf("\nBLOCK ENDS");}
{identifier} {printf("\n%s is an IDENTIFIER",yytext);}
. | \n
%%
int yywrap()
{
return 0;
}
Input ( in.c )
#include<stdio.h>
main()
{
int a ;
}
10
OUTPUT:
RESULT:
Thus the C program for the implementation of lexical analyzer using LEX Tool was executed successfully.
11
Ex. No: 3
DATE:
GENERARATION OF YACC SPECIFICATION
RECOGNIZING A VALID ARITHMETIC EXPRESSION
AIM:
To write a program to recognize a valid arithmetic expression that uses operator +, - , * and / using YACC tool.
ALGORITHM:
LEX
1. Declare the required header file and variable declaration with in ‘%{‘ and ‘%}’.
2. LEX requires regular expressions to identify valid arithmetic expression token of lexemes.
3. LEX call yywrap() function after input is over. It should return 1 when work is done or should return 0
when more processing is required.
YACC
1. Declare the required header file and variable declaration with in ‘%{‘ and ‘%}’.
2. Define tokens in the first section and also define the associativity of the operations
3. Mention the grammar productions and the action for each production.
4. $$ refer to the top of the stack position while $1 for the first value, $2 for the second value in the stack.
5. Call yyparse() to initiate the parsing process.
6. yyerror() function is called when all productions in the grammar in second section doesn't match to the
input statement.
12
PROGRAM:
//art_expr.l
%{
#include<stdio.h>
#include "y.tab.h"
%}
%%
[a-zA-Z][0-9a-zA-Z]* {return ID;}
[0-9]+ {return DIG;}
[ \t]+ {;}
. {return yytext[0];}
\n {return 0;}
%%
int yywrap()
{
return 1;
}
//art_expr.y
%{
#include<stdio.h>
%}
%token ID DIG
%left '+''-'
%left '*''/'
%right UMINUS
%%
stmt:expn ;
expn:expn'+'expn
|expn'-'expn
|expn'*'expn
|expn'/'expn
|'-'expn %prec UMINUS
|'('expn')'
|DIG
|ID
13
;
%%
int main()
{
printf("Enter the Expression \n");
yyparse();
printf("valid Expression \n");
return 0;
}
int yyerror()
{
printf("Invalid Expression");
exit(0);
}
OUTPUT
RESULT:
Thus the program to recognize a valid arithmetic expression that uses operator +, - , * and / using YACC tool was
executed and verified successfully.
14
Ex. No: 4
DATE:
RECOGNIZING A VALID VARIABLE
AIM:
To write a program to recognize a valid variable which starts with a letter followed by any number of letters or
digits using YACC tool.
ALGORITHM:
LEX
1. Declare the required header file and variable declaration with in ‘%{‘ and ‘%}’.
2. LEX requires regular expressions or patterns to identify token of lexemes for recognize a valid variable.
3. Lex call yywrap() function after input is over. It should return 1 when work is done or should return 0
when more processing is required.
YACC
1. Declare the required header file and variable declaration with in ‘%{‘ and ‘%}’.
2. Define tokens in the first section and also define the associativity of the operations
3. Mention the grammar productions and the action for each production.
4. $$ refer to the top of the stack position while $1 for the first value, $2 for the second value in the stack.
5. Call yyparse() to initiate the parsing process.
6. yyerror() function is called when all productions in the grammar in second section doesn't match to the
input statement.
15
PROGRAM:
//valvar.l
%{
#include "y.tab.h"
%}
%%
[a-zA-Z] {return LET;}
[0-9] {return DIG;}
. {return yytext[0];}
\n {return 0;}
%%
int yywrap()
{
return 1;
}
//valvar.y
%{
#include<stdio.h>
%}
%token LET DIG
%%
variable:var
;
var:var DIG
|var LET
|LET
;
%%
int main()
{
printf("Enter the variable:\n");
yyparse();
printf("Valid variable \n");
16
return 0;
}
int yyerror()
{
printf("Invalid variable \n");
exit(0);
}
OUTPUT:
RESULT:
Thus the program to recognize a valid variable which starts with a letter followed by any number of letters or
digits using YACC tool was executed and verified successfully.
17
Ex.NO:5
DATE:
Program to recognize a valid control structures syntax of C language (For loop,
while loop, if-else, if-else-if, switch-case, etc.).
AIM:
To write a program to implement recognize a valid control structures syntax of C language (For loop,
while loop, if-else, if-else-if, switch-case, etc.).
ALGORITHM:
1. %{ ... %}: C code that is copied directly into the generated parser code.
2. %union: Defines a union named yystype, which is used for storing token values during parsing.
3. %token: Declares the token types used in the grammar.
4. %left: Defines left-associative precedence for operators.
5. %start: Specifies the starting symbol for parsing.
6. The program rule represents a sequence of statements.
7. The statement rule handles different types of control structures like for, while, and if-else.
8. The for_loop, while_loop, and if_else rules represent the syntax of each respective control structure.
9. The expr rule represents a simple expression with basic arithmetic operations and parentheses.
18
PROGRAM:
Step 1: Create a new file named "lexer.l" to implement the lexer using Flex (Lex) syntax.
%{
#include "parser.tab.h" // Include the parser header file
%}
%%
%%
int yywrap() {
return 1;
}
Step 2: Create a new file named "parser.y" to implement the parser using Bison (Yacc) syntax.
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
%}
%union {
char* str;
}
19
%token <str> IDENTIFIER
%token <str> NUMBER
%token FOR WHILE IF ELSE
%token <str> LPAREN RPAREN LBRACE RBRACE SEMICOLON
%start program
%%
program: statement
| program statement
;
statement: for_loop
| while_loop
| if_else
;
for_loop: FOR LPAREN expr SEMICOLON expr SEMICOLON expr RPAREN LBRACE program RBRACE
{
// Action for for loop
printf("For loop found.\n");
}
;
expr: IDENTIFIER
| NUMBER
| expr '+' expr
| expr '-' expr
| expr '*' expr
| expr '/' expr
| LPAREN expr RPAREN
20
;
%%
int main() {
yyin = fopen("input_code.c", "r"); // Replace "input_code.c" with the path to your input code file.
if (!yyin) {
fprintf(stderr, "Error opening input file.\n");
return 1;
}
yyparse();
fclose(yyin);
return 0;
}
Step 3: Compile the lexer and parser files using Flex and Bison respectively.
flex lexer.l
bison -d parser.y
gcc lex.yy.c parser.tab.c -o parser –lfl
Step 4: Create a new file named "input_code.c" and paste the sample input code provided earlier.
Step 5: Run the compiled "parser" executable.
bash
./parser
OUTPUT:
If statement found.
For loop found.
RESULT:
Thus the program for using LEX and YACC is executed and verified.
21
Ex.NO:6
DATE:
AIM:
To write a program to implement Calculator using LEX and YACC.
ALGORITHM:
22
PROGRAM:
cal.l
DIGIT [0-9]+
%option noyywrap
%%
%%
cal.y
%{
#include<ctype.h>
#include<stdio.h>
#define YYSTYPE double
%}
%token NUM
%left ‘+’ ‘-‘
%left ‘*’ ‘/’
%right UMINUS
%%
23
OUTPUT:
RESULT:
Thus the program for implementing calculator using LEX and YACC is executed and verified.
24
Ex.No:7
DATE:
Generate three address code for a simple program using LEX and YACC.
AIM:
To write the program to Generate three address code for a simple program using LEX and
YACC.
ALGORITHM:
1. Lexical Analysis (using Lex): a. Read the input program. b. Tokenize the input program using regular expressions
defined in Lex. c. Pass recognized tokens to Yacc for parsing.
2. Parsing (using Yacc): a. Define Yacc rules for the grammar of the language. b. In Yacc rules, implement semantic
actions to generate intermediate code for each production. c. Use appropriate data structures to store the
intermediate code.
3. Intermediate Code Generation: a. Define data structures for storing intermediate code, like a list of quadruples or
triples. b. In the Yacc rules, for each production, generate intermediate code and add it to the intermediate code
list. c. Use a stack to manage temporary variables, operators, and results during intermediate code generation.
4. Display or Output the Intermediate Code: a. Once the parsing is completed, display the generated intermediate
code. b. The intermediate code will consist of three-address format instructions.
5. Optionally, Optimization (if desired): a. Perform optimization on the generated intermediate code if needed.
6. Code Generation (if required): a. Use the generated intermediate code to produce the target code (e.g., machine
code or assembly) if needed.
25
PROGRAM:
Here's the Lex specification (lexer.l):
%{
#include "y.tab.h"
%}
DIGIT [0-9]
%%
{DIGIT}+ { yylval = atoi(yytext); return NUMBER; }
[ \t\n] ; // Skip whitespace
. { return yytext[0]; } // Return other single characters as they are
%%
Now, let's define the YACC specification (parser.y) for generating the three-address code:
%{
#include <stdio.h>
#include <stdlib.h>
int yylex(void);
void yyerror(const char *msg);
int nextTemporary = 1;
%union {
int num;
char* str;
}
%start program
%%
program: statement
| program statement
;
%%
int main() {
yyparse();
return 0;
}
1. Run Flex to generate the lexer code:
flex lexer.l
This will generate the file lex.yy.c.
2. Run YACC to generate the parser code:
yacc -d parser.y
This will generate the files y.tab.c and y.tab.h.
3. Compile the C program:
27
OUTPUT:
t1 = 10 + 5 t2 = t1 - 3
RESULT:
Thus the program to convert the BNF rules into YACC forms and writes code to generate abstract syntax tree was
executed successfully.
28
Ex.No:8
DATE:
AIM:
ALGORITHM:
29
PROGRAM:
#include<stdio.h>
#include<string.h>
#include<conio.h>
int count=1,i=0,j=0,l=0,findval=0,k=0,kflag=0;
char key[4][12]= {"int","float","char","double"};
char dstr[100][100],estr[100][100];
char token[100],resultvardt[100],arg1dt[100],arg2dt[100];
void entry();
int check(char[]);
int search(char[]);
void typecheck();
struct table
{
char var[10];
char dt[10];
};
struct table tbl[20];
void main()
{
clrscr();
printf("\n IMPLEMENTATION OF TYPE CHECKING \n");
i=0;
printf("\n SEMANTIC ANALYZER(TYPE CHECKING): \n");
while(strcmp(dstr[i],"END"))
{
entry();
printf("\n");
i++;
}
l=0;
while(strcmp(estr[l],"END"))
{
typecheck();
printf("\n");
30
l++;
}
void typecheck()
{
memset(token,0,strlen(token));
j=0;
k=0;
while(estr[l][j]!='=')
31
{
token[k]=estr[l][j];
k++;
j++;
}
findval=search(token);
if(findval>0)
{
strcpy(resultvardt,tbl[findval].dt);
findval=0;
}
else
{
printf("Undefined Variable\n");
}
k=0;
memset(token,0,strlen(token));
j++;
while(((estr[l][j]!='+')&&(estr[l][j]!='-')&&(estr[l][j]!='*')&&(estr[l][j]!='/')))
{
token[k]=estr[l][j];
k++;
j++;
}
findval=search(token);
if(findval>0)
{
strcpy(arg1dt,tbl[findval].dt);
findval=0;
}
else
{
printf("Undefined Variable\n");
}
k=0;
memset(token,0,strlen(token));
j++;
while(estr[l][j]!=';')
{
token[k]=estr[l][j];
k++;
j++;
}
findval=search(token);
if(findval>0)
{
strcpy(arg2dt,tbl[findval].dt);
findval=0;
}
else
{
printf("Undefined Variable\n");
}
if(!strcmp(arg1dt,arg2dt))
{
32
if(!strcmp(resultvardt,arg1dt))
{
printf("\tThere is no type mismatch in the expression %s ",estr[l]);
}
else
{
printf("\tLvalue and Rvalue should be same\n");
}
}
else
{
printf("\tType Mismatch\n");
}
}
33
OUTPUT:
RESULT:
Thus the program for type checking is executed and verified.
34
Ex.No:9
DATE:
AIM:
ALGORITHM:
35
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<ctype.h>
void main()
{
char a[25][25],u,op1='*',op2='+',op3='/',op4='-';
int p,q,r,l,o,ch,i=1,c,k,j,count=0;
FILE *fi,*fo;
// clrscr();
printf("Enter three address code");
printf("\nEnter the ctrl-z to complete:\n");
fi=fopen("infile.txt","w");
while((c=getchar())!=EOF)
fputc(c,fi);
fclose(fi);
printf("\n Unoptimized input block\n");
fi=fopen("infile.txt","r");
while((c=fgetc(fi))!=EOF)
{
k=1;
while(c!=';'&&c!=EOF)
{
a[i][k]=c;
printf("%c",a[i][k]);
k++;
c=fgetc(fi);
}
printf("\n");
i++;
}
count=i;
fclose(fi);
i=1;
printf("\n Optimized three address code");
while(i<count)
36
{
if(strcmp(a[i][4],op1)==0&&strcmp(a[i][5],op1)==0)
{
printf("\n type 1 reduction in strength");
if(strcmp(a[i][6],'2')==0)
{
for(j=1;j<=4;j++)
printf("%c",a[i][j]);
printf("%c",a[i][3]);
}
}
else if(isdigit(a[i][3])&&isdigit(a[i][5]))
{
printf("\n type2 constant floding");
p=a[i][3];
q=a[i][5];
if(strcmp(a[i][4],op1)==0)
r=p*q;
if(strcmp(a[i][4],op2)==0)
r=p+q;
if(strcmp(a[i][4],op3)==0)
r=p/q;
if(strcmp(a[i][4],op4)==0)
r=p-q;
for(j=1;j<=2;j++)
printf("%c",a[i][j]);
printf("%d",r);
printf("\n");
}
else if(strcmp(a[i][5],'0')==0||strcmp(a[i][5],'1')==0)
{
cprintf("\n type3 algebraic expression elimation");
if((strcmp(a[i][4],op1)==0&&strcmp(a[i][5],'1')==0)||(strcmp(a[i][4],op3)==0&&strcmp(a[i][5],'
1')==0))
{
for(j=1;j<=3;j++)
37
printf("%c",a[i][j]);
printf("\n");
}
else
printf("\n sorry cannot optimize\n");
}
else
{
printf("\n Error input");
}
i++;
}
getch();
}
38
infile.txt
a=d/1; b=2+4; c=s**2;
OUTPUT
RESULT
Thus the C program for implementation of Code optimization was executed successfully.
39
Ex.No: 10
DATE:
IMPLEMENTATION OF BACKEND
AIM:
To write a ‘C’ program to generate the machine code for the given intermediate code.
ALGORITHM:
Step3: Display the assembly code according to the operators present in the given expression.
Step4: Use the temporary registers (R0, R1) while storing the values in assembly code programs.
40
PROGRAM:
/* CODE GENERATOR */
#include<stdio.h>
#include<string.h>
int count=0,i=0,l=0;
char str[100][100];
void gen();
void main()
{
clrscr();
printf("\n CODE GENERATOR \n");
printf("\n ENTER THREE ADDRESS CODE \n\n");
do
{
printf("\t");
gets(str[i]);
i++;
} while(strcmp(str[i-1],"QUIT"));
i=0;
printf("\n ASSEMBLY LANGUAGE CODE: \n");
while(strcmp(str[i-1],"QUIT"))
{
gen();
printf("\n");
i++;
}
void gen()
{
int j;
printf("\n");
for(j=strlen(str[i])-1;j>=0;j--)
{
char reg='R';
if(isdigit(str[i][j])||(isalpha(str[i][j]))|| str[i][j]=='+'||str[i][j]=='-'||str[i][j]=='*'||str[i][j]=='/'||str[i][j]=='
'||str[i][j]=='|'||str[i][j]=='&'||str[i][j]==':'||str[i][j]=='=')
{
switch(str[i][j])
{
case '+':
printf("\n\t MOV\t%c,%c%d",str[i][j-1],reg,count);
41
printf("\n\t ADD\t%c,%c%d",str[i][j+1],reg,count);
break;
case '-':
printf("\n\t MOV\t%c,%c%d",str[i][j-1],reg,count);
printf("\n\t SUB\t%c,%c%d",str[i][j+1],reg,count);
break;
case '*':
printf("\n\t MOV\t%c,%c%d",str[i][j-1],reg,count);
printf("\n\t MUL\t%c,%c%d",str[i][j+1],reg,count);
break;
case '/':
printf("\n\t MOV\t%c,%c%d",str[i][j-1],reg,count);
printf("\n\t DIV\t%c,%c%d",str[i][j+1],reg,count);
break;
case '|':
printf("\n\t MOV\t%c,%c%d",str[i][j-1],reg,count);
printf("\n\t OR\t%c,%c%d",str[i][j+1],reg,count);
break;
case '&':
printf("\n\t MOV\t%c,%c%d",str[i][j-1],reg,count);
printf("\n\t AND\t%c,%c%d",str[i][j+1],reg,count);
break;
case ':':
if(str[i][j+1]=='=')
{
printf("\n\t MOV\t%c%d,%c",reg,count,str[i][j-1]);
count++;
}
else
{
printf("\n syntax error...\n");
}
break;
default:
break;
}
}
else printf("\n Error\n");
}
}
42
OUTPUT:
CODE GENERATOR
A:=B+C
D:=E/F
QUIT
MOV B,R0
ADD C,R0
MOV R0,A
MOV E,R1
DIV F,R1
MOV R1,D
RESULT:
Thus the program for generation of Machine Code for the given intermediate code is executed and verified
43
44