Cs3501 Compiler Design Laboratory
Cs3501 Compiler Design Laboratory
Page
S.No Date Name of Experiments Marks Staff Signature
No
CONTENTS
Page
S.No Date Name of Experiments Marks Staff Signature
No
AIM:
ALGORITHM:
Identifiers:
If the current character is a digit, start reading characters until a non-digit or a dot (.) is
encountered. If a dot is encountered, continue reading digits as it might be a floating-point
number. Add the identified number as a constant token to the list.
Check for operators and special characters such as +, -, *, /, =, ==, !=, <, >, etc. Identify these
characters individually and add them as operator tokens to the list.
Comments:
Check for comment patterns such as // for single-line comments and /* */ for multi-line
comments. Ignore the characters inside comments as they are not part of the tokens.
Ignore whitespace characters, tabs, and newline characters as they are typically not part of the
tokens. They are used for code formatting.
Step 4: Output the list of tokens identified during the lexical analysis. Each token should be
labelled with its type (identifier, constant, operator, etc.) and the actual value (if applicable).
1
PROGRAM:
%{
#include<stdio.h>
CS3501_CD
#include<stdlib.h>
#include<string.h>
#define MAX_IDENTIFIER_LENGTH 50
%}
%option noyywrap
%option yylineno
%%
\n /*ignore newline*/
\* |
\+ |
\- {printf("%s is a Operator\n",yytext);}
[a-zA-Z][a-zA-Z0-9]* {printf("identifier:%s\n",yytext);addtosymboltable(yytext);}
%%
2
typedef struct
char name[MAX_IDENTIFIER_LENGTH];
} Symbol;
Symbol symbol_table[100];
int symbol_count=0;
strncpy(symbol_table[symbol_count].name,identifier,MAX_IDENTIFIER_LENGTH-1);
symbol_table[symbol_count].name[MAX_IDENTIFIER_LENGTH-1]='\0';
symbol_count++;
else
exit(0);
int main()
yylex();
return 1;
3
OUTPUT:
RESULT:
Thus the program for developing a lexical analyzer to recognize a few patterns in C has been
executed successfully.
4
Ex.No:2 Implement a Lexical Analyzer using Lex Tool
AIM
ALGORITHM
Step 3: Check for the given list of keywords and print them as keyword if it is encountered.
Step 5: For a function, print the beginning and ending of the function block.
Step 6: Similarly print the corresponding statements for numbers, identifiers and assignment
operators.
Step 7: In the main function get the input file as argument and open the file in read mode.
Step 8: Then read the file and print the corresponding lex statement given above.
PROGRAM CODE
%{
int COMMENT=0;
%}
identifier[a-zA-Z][a-zA-Z0-9]*
%%
int |
float |
void |
5
main |
if |
else |
printf |
scanf |
for |
char |
getch |
"/*" {COMMENT=1;}
"*/" {COMMENT=0;}
\)(\;)? {if(!COMMENT)printf("\t");ECHO;printf("\n");}
\(ECHO;
\<= |
\>= |
\< |
.|\n
6
%%
if(argc>1)
FILE*file;
file=fopen(argv[1],"r");
if(!file)
exit(0);
yyin=file;
yylex();
printf("\n\n");
return 0;
int yywrap()
return 0;
7
OUTPUT:
RESULT:
Thus the program for implementing a Lexical analyzer using LEX tool was executed
successfully.
8
Program to recognize a valid arithmetic expression that uses
Ex.No:3a operator + , -, * and /.
AIM
To write a program for recognizing a valid arithmetic expression that uses the operator
+,-* and / using semantic rules of the YACC tool and LEX.
ALGORITHM
Step 1: A Yacc source program has three parts as follows: Declarations %% translation rules
%% supporting C routines
Step 3: Rules Section: The rules section defines the rules that parse the input stream. Each rule
of a grammar production and the associated semantic action.
Step 4: Programs Section: The programs section contains the following subroutines. Because
these subroutines are included in this file, it is not necessary to use the yacc library when
processing this file.
Main- The required main program that calls the yyparse subroutine to start the program.
yywrap -The wrap-up subroutine that returns a value of 1 when the end of input occurs. The
calc.lex file contains include statements for standard input and output, as programmer file
information if we use the -d flag with the yacc command. The y.tab.h file contains definitions for
Step 5:calc.lex contains the rules to generate these tokens from the input stream.
9
PROGRAM CODE
%{
#include "ex3a.tab.h"
%}
%%
. return yytext[0];
\n return 0;
%%
int yywrap()
return 1;
%{
#include<stdio.h>
%}
%token A
10
%token ID
%%
statement:A'='E
| E{
$$=$1;
};
E:E'+'ID
|E'-'ID
|E'*'ID
|E'/'ID
|ID
%%
main()
do
yyparse();
}while(!feof(yyin));
yyerror(char*s)
11
OUTPUT:
[root@localhost]# ./a.out
x=a+b;
Identifier is x
Operator is EQUAL
Identifier is a
Operator is PLUS
Identifier is b
RESULT:
12
Ex.No:3b Program to recognize a valid variable which starts with a letter
followed by any number of letters or digits.
AIM
To write a program for recognizing a valid variable which starts with a letter followed by any
number of letters /digits
ALGORITHM
Step 1: A Yacc source program has three parts as follows: Declarations %% translation rules
%% supporting C routines
Step 3: Rules Section: The rules section defines the rules that parse the input stream. Each rule
of a grammar production and the associated semantic action.
Step 4: Programs Section: The programs section contains the following subroutines. Because
these subroutines are included in this file, it is not necessary to use the yacc library when
processing this file.
Main- The required main program that calls the yyparse subroutine to start the program.
yywrap -The wrap-up subroutine that returns a value of 1 when the end of input occurs. The
calc.lex file contains include statements for standard input and output, as programmer file
information if we use the -d flag with the yacc command. The y.tab.h file contains definitions for
the tokens that the parser program uses.
Step 5:calc.lex contains the rules to generate these tokens from the input stream.
13
PROGRAM CODE
%{
#include "ex3b.tab.h"
%}
%%
. return yytext[0];
\n return 0;
int yywrap()
return 1;
%{
#include<stdio.h>
%}
%%
D:T L
L:L ID
14
| ID
T:INT
| FLOAT
| DOUBLE
%%
main()
do
yyparse();
}while(!feof(yyin));
yyerror(char*s)
15
OUTPUT:
[root@localhost]# ./a.out
int a,b;
Identifier is a
Identifier is b
RESULT:
16
Ex.No:3c Program to recognize a valid control structures syntax of c
language(For loop, While loop, if-else, if-else-if, switch case, etc.,
AIM
To write a program to recognize a valid control structures syntax of c language (For loop, While
loop, if-else, if-else-if, switch case, etc.
ALGORITHM
Step 2: Tokenize the input code. Split the code into individual tokens (keywords, identifiers,
operators, etc.) for easier processing.
For Loop:
Check for tokens that match the pattern: for (initialization; condition; increment) {/* code */}
Recursively check the code inside the loop using the same steps.
While Loop:
Check for tokens that match the pattern: while (condition) {/* code */}
If-Else Statements:
Check for tokens that match the pattern: if (condition) {/* code */} else {/* code */}
Ensure proper parentheses and braces for both if and else parts.
Else-If Ladder:
17
Ensure proper parentheses and braces for each if and else block.
Switch-Case Statements:
switch (variable) { case constant1: /* code */ break; case constant2: /* code */ break; ... default:
/* code */ break; }
Recursively check the code inside each case and default block.
Step 4: If the code passes all syntax checks, output a message indicating that the control
structures are valid. Otherwise, indicate the specific error encountered during the parsing
process.
PROGRAM:
%{
#include<stdio.h>
#include "ex3c.tab.h"
%}
%%
18
"break" { return BREAK; }
[\t\n] ;
. ;
%%
int yywrap()
return 1;
%{
#include<stdio.h>
int yylex();
%}
%%
program: statement
| program statement
19
;
statement:if_statement
|while_loop
|switch_case_statement
|for_loop
expression_opt CLOSE_BRACE
20
;
expression_opt:/*empty*/
|expression
|expression SEMICOLON
expression:
%%
fprintf(stderr,"Error=%s\n", s);
return 1;
int main() {
if(yyparse()==0){
21
printf("Parsing completed successfully\n");
else{
return 0;
OUTPUT:
for(i=0;i<10;i++)
a=c+b
if(a<b)
a=a+b
else
a=a-b
RESULT:
22
Ex.No:3d Implement an Arithmetic Calculator using LEX and YACC
AIM
To write a program for implementing a calculator for computing the given expression using
semantic rules of the YACC tool and LEX.
ALGORITHM
1. A Yacc source program has three parts as follows: Declarations %% translation rules %%
supporting C routines
2. Declarations Section:
This section contains entries that:
3. Rules Section: The rules section defines the rules that parse the input stream. Each rule of
a grammar production and the associated semantic action.
4. Programs Section: The programs section contains the following subroutines. Because
these subroutines are included in this file, it is not necessary to use the yacc library when
processing this file.
Main- The required main program that calls the yyparse subroutine to start the program.
yywrap -The wrap-up subroutine that returns a value of 1 when the end of input occurs.
The calc.lex file contains include statements for standard input and output, as
programmar file information if we use the -d flag with the yacc command. The y.tab.h
file contains definitions for the tokens that the parser program uses.
5. calc.lex contains the rules to generate these tokens from the input stream.
23
PROGRAM CODE
%{
#include <stdio.h>
#include "ex3d.tab.h"
%}
%%
[0-9]+ {
yylval = atoi(yytext);
return NUMBER;
[ \t] ;
[\n] return 0;
. {return yytext[0];}
%%
int yywrap()
return 1;
%{
#include <stdio.h>
int flag = 0;
%}
24
%token NUMBER
%%
ArithmeticExpression: E {
return 0;
};
E: E '+' E { $$ = $1 + $3; }
| E '-' E { $$ = $1 - $3; }
| E '*' E { $$ = $1 * $3; }
| E '/' E { $$ = $1 / $3; }
| E '%' E { $$ = $1 % $3; }
| NUMBER { $$ = $1; }
%%
int main()
yyparse();
if (flag == 0) {
25
}
return 0;
flag = 1;
OUTPUT
RESULT:
Thus the program for implementing a calculator for computing the given expression
using semantic rules of the YACC tool and LEX was executed successfully.
26
Generate three address code for a simple program using LEX
Ex.No: 4
and YACC
AIM:
To write program for Generate three address code for a simple program using LEX and YACC.
ALGORITHM:
Define lexical rules using Lex to tokenize the input code. Specify regular expressions for
identifiers, constants, operators, and keywords. Lex generates tokens for recognized patterns.
Define grammar rules using Yacc for expressions, statements, assignments, and control
structures. Attach actions to productions for generating three-address code. Use semantic actions
to handle grammar rules.
Create a symbol table to store identifiers, their types, and memory locations. Update the symbol
table during the parsing process. Assign memory addresses to variables during declarations.
Integrate intermediate code generation logic within Yacc actions. When parsing expressions or
statements, generate corresponding three-address code instructions. Store generated code in an
intermediate code representation.
Step 5: After parsing the input code, the intermediate code will be generated based on the
specified rules. Store or process the generated three-address code for further optimization or
translation to machine code if necessary.
27
PROGRAM:
%{
#include<stdio.h>
#include "ex4.tab.h"
%}
%%
CS3501_CD
[\t] ;
\n {return EOL;}
. {fprintf(stderr,"Error:Invalid Character\n");}
%%
int yywrap(){
return 1;
%{
#include<stdio.h>
#include<stdlib.h>
int temp_count=0;
28
void yyerror(const char*s){
fprintf(stderr,"Error:%s\n",s);
%}
%%
program:lines
lines:lines line
| line
line:expr EOL
printf("Result:t%d\n",$1);
expr:NUM{
$$=$1;
$$=$2;
29
| expr '+' expr
printf("t%d=%d+%d\n",++temp_count,$1,$3);
$$=temp_count;
printf("t%d=%d-%d\n",++temp_count,$1,$3);
$$=temp_count;
printf("t%d=%d*%d\n",++temp_count,$1,$3);
$$=temp_count;
if($3==0)
{yyerror("Division by zero");
$$=0;}
else{
printf("t%d=%d/%d\n",++temp_count,$1,$3);
$$=temp_count;
30
;
%%
int main()
yyparse();
return 0;
OUTPUT:
2*10/2+5-1
t1=2*10
t2=1/2
t3=2+5
t4=3-1
Result:t4
RESULT:
Thus the program for Generate three address code for a simple program using LEX and YACC
was executed successfully.
31
Ex.No:5 Implement Type Checking Using LEX And YACC
AIM:
To write a program for implement type checking using Lex and Yacc.
ALGORITHM:
Specify the grammar rules for the programming language, including expressions, statements,
declarations, and data types. Incorporate type information where applicable.
Implement lexical analysis using Lex to tokenize the input code. Define regular expressions for
identifiers, keywords, operators, constants, and other language constructs. Lex generates tokens
for recognized patterns.
Create a symbol table data structure to store identifiers, their declared types, and other relevant
information during the parsing process. Initialize the symbol table.
Define Yacc grammar rules based on the language's syntax. Include actions within Yacc rules to
handle type checking logic.
During parsing, populate the symbol table with identifier names and their declared types.
Implement type checking logic within Yacc actions. Perform type comparisons and validations
according to the language's rules. For example:
32
Step 6: Error Handling:
Implement error handling mechanisms within Yacc actions for type mismatch errors. Generate
meaningful error messages when type mismatches are detected, indicating the source of the
error.
PROGRAM:
lex part:
%{
#include "type.tab.h"
%}
%%
[0-9]+ {
yylval = atoi(yytext);
return INTEGER;
[0-9]+"."[0-9]* {
yylval = atof(yytext);
return FLOAT;
[a-zA-Z]+ {
yylval= yytext;
return CHAR;
33
%%
int yywrap() {
return 1;
Yacc part:
%{
#include <stdio.h>
%}
%%
CS3501_CD
program:
/* empty */
| program line
line:
statement EOL {
if ($1 == INTEGER) {
printf("Type: INTEGER\n");
34
} else if ($1 == FLOAT) {
printf("Type: FLOAT\n");
printf("Type: CHAR/STRING\n");
} else {
printf("Invalid type\n");
statement:
expression {
$$ = $1;
expression:
INTEGER {
$$ = INTEGER;
| FLOAT {
$$ = FLOAT;
| CHAR {
$$ = CHAR;
35
;
%%
int main() {
yyparse();
return 0;
OUTPUT:
123
Type:INTEGER
123.897
Type:FLOAT
God
Type:CHAR/STRING
df24
RESULT:
Thus the program for implement type checking using Lex and Yacc was written and executed
successfully.
36
Ex.No:6 Implement simple code optimization techniques (Constant folding,
Strength reduction and Algebraic transformation)
AIM
ALGORITHM
1. Generate the program for factorial program using for and do-while loop to specify
optimization technique.
2. In for loop variable initialization is activated first and the condition is checked next. If the
condition is true the corresponding statements are executed and specified increment / decrement
operation is performed.
4. In do-while loop the variable is initialized and the statements are executed then the condition
checking and increment / decrement operation is performed.
5. When comparing both for and do-while loop for optimization dowhile is best because first the
statement execution is done then only the condition is checked. So, during the statement
execution itself we can find the inconvenience of the result and no need to wait for the specified
condition result.
6. Finally when considering Code Optimization in loop do-while best with is respect to
performance.
PROGRAM CODE
#include<stdio.h>
#include<string.h>
struct op
char l;
char r[20];
op[10],pr[10];
37
void main()
int a,i,k,j,n,z=0,m,q;
char *p,*l;
char temp,t;
char *tem;
scanf("%d",&n);
for(i=0;i<n;i++)
printf("left: ");
scanf(" %c",&op[i].l);
printf("right: ");
scanf(" %s",&op[i].r);
printf("Intermediate Code
") ;
for(i=0;i<n;i++)
printf("%c=",op[i].l);
printf("%s
",op[i].r);
38
for(i=0;i<n-1;i++)
temp=op[i].l;
for(j=0;j<n;j++)
p=strchr(op[j].r,temp);
if(p)
pr[z].l=op[i].l;
strcpy(pr[z].r,op[i].
r);
z++;
pr[z].l=op[n-1].l;
strcpy(pr[z].r,op[n-1].r);
z++;
printf("
");
for(k=0;k<z;k++)
printf("%c =",pr[k].l);
printf("%s
39
",pr[k].r);
for(m=0;m<z;m++)
tem=pr[m].r;
for(j=m+1;j<z;j++)
p=strstr(tem,pr[j].r);
if(p)
t=pr[j].l;
pr[j].l=pr[m].l;
for(i=0;i<z;i++)
l=strchr(pr[i].r,t) ;
if(l)
a=l-pr[i].r;
printf("pos: %d",a);
pr[i].r[a]=pr[m].l;
}}}}}
for(i=0;i<z;i++)
printf("%c =",pr[i].l);
40
printf("%s",pr[i].r);
for(i=0;i<z;i++)
for(j=i+1;j<z;j++)
q=strcmp(pr[i].r,pr[j].r);
if((pr[i].l==pr[j].l)&&!q)
pr[i].l='';
printf("Optimized Code");
for(i=0;i<z;i++)
if(pr[i].l!='')
printf("%c=",pr[i].l);
printf("%s",pr[i].r);
41
OUTPUT
RESULT:
42
Implement back-end of the compiler for which the three address
Ex.No:7 code is given as input and the 8086 assembly language code is
produced as output.
AIM
ALGORITHM
2. Get the three variables from statements and stored in the text file k.txt.
3. Compile the program and give the path of the source file.
PROGRAM CODE
#include <stdio.h>
#include <stdio.h>
#include<conio.h>
#include <string.h>
void main() {
int i = 0;
clrscr();
do
scanf("%s", icode[i]);
43
} while (strcmp(icode[i++], "exit") != 0);
printf("
printf("
************************");
i = 0;
do {
strcpy(str, icode[i]);
switch (str[3]) {
case '+':
break;
case '-':
break;
case '*':
break;
case '/':
break;
printf("
printf("
44
%s%c,R%d", opr, str[4], i);
printf("
getch();
45
OUTPUT
a=a*b
c=f*h
g=a*h
f=Q+w
t=q-j
exit
target code generation
************************
Mov a,R0
MUL b,R0
Mov R0,a
Mov f,R1
MUL h,R1
Mov R1,c
Mov a,R2
MUL h,R2
Mov R2,g
Mov Q,R3
ADD w,R3
Mov R3,f
Mov q,R4
SUB j,R4
Mov R4,t
RESULT:
Thus the C program to implement the Back end of the compiler- for which the three address code
is given as input and the 8086 assembly language code is produced as output was successfully
executed.
46