Report
Report
CE468
Introduction to Compilers and Translation Engineering
Compiler for a subset of Python-like programming
language
Name ID
Page 1 of
15
Language Alphabet
Digits 0, 1, 2, ..., 9
Statement Types
<statement> → <assignment>
| <function_call>
| <if_statement>
| <for_statement>
| <def_statement>
| <class_statement>
| <return_statement>
| <with_statement>
| <print_statement>
Assignment
<assignment> → <identifier> "=" <expression>
| <identifier> "[" <expression> "]" "=" <expression> # For
list indexing Function Call
<function_call> → <identifier> "(" [<arguments>] ")"
If Statement
<if_statement> → "if" <expression> ":"
<statements> ["elif"
<expression> ":" <statements>]
["else:" <statements>]
Page
2
For Statement
<for_statement> → "for" <identifier> "in" <expression> ":" <statements>
| "for" <identifier> "in" <expression> ":" <statements>
"else:" <statements> Function Definition
<def_statement> → "def" <identifier> "(" [<parameters>] ")" ":"
<statements>
Class Definition
<class_statement> → "class" <identifier> [ "(" [<parameters>] ")" ] ":"
<statements>
Return Statement
<return_statement> → "return" [<expression>]
Print Statement
<print_statement> → "print" "(" [<expression>] ")“
Expressions
<expression> → <identifier>
| <literal>
| <expression> <operator> <expression>
| "(" <expression> ")"
| <list_comprehension>
| <dict_comprehension>
Parameters
<parameters> → <identifier> ["," <parameters>]
| <empty>
Arguments
<arguments> → <expression> ["," <arguments>]
| <empty>
Literals
<literal> → <integer> | <float> | <string> | "True“ | "False“ | "None"
Operators
<operator> → "+" | "-" | "*" | "/" | "%" | "==" | "!=" | ">" | "<" | ">=" | "<="
Lexer Code
(\".*\"|\'.*\')
divideequal \
/=
plusequal \+=
minusequal \-=
mulequal \*=
equal =
plus \+
minus \-
divide \/
mul \*
special_start \(
special_end \
)
alpha [a-zA-Z]
%{
#include
<stdio.h>
#include
"y.tab.h"
#include
<string.h>
extern YYSTYPE
yylval; int t = 1;
int n = 0;
%}
%%
/* Token Definitions */
\t { printf("indent "); return INDENT; } /* Indentation */
#.*\n { printf("comment "); return COMMENT; } /* Single-line
comment */ (''')(.*\n)*(.*)(''') { printf("comment "); return
COMMENT; } /* Multi-line comment */ class { printf("class ");
return CLASS; } /* Class keyword */
def { printf("function "); return FUNCTION; } /*
Function keyword */ if { printf("if "); return IF; } /* If
keyword */
else { printf("else "); return ELSE; } /* Else
keyword */ elif { printf("elseif "); return
ELSEIF; } /* Elif keyword */ for {
printf("for "); return FOR; } /* For loop keyword */
True { printf("True "); yylval = strdup(yytext); return T;
} /* True literal */ False { printf("False "); yylval =
strdup(yytext); return F; } /* False literal */
/* Arithmetic Operators */
{plus} { printf("plus "); yylval = strdup(yytext); return PLUS; }
{minus} { printf("minus "); yylval = strdup(yytext); return MINUS; }
{mul} { printf("mul "); yylval = strdup(yytext); return MUL; }
{divide} { printf("divide "); yylval = strdup(yytext); return DIVIDE; }
/* Assignment Operators */
{equal} { printf("equal "); yylval = strdup(yytext); return EQUAL; }
{plusequal} { printf("plusequal "); return PLUSEQUAL; }
{divideequal} { printf("divideequal "); return DIVIDEEQUAL; }
{mulequal} { printf("mulequal "); return MULEQUAL; }
{minusequal} { printf("minusequal "); return MINUSEQUAL; }
/* Special Characters */
{special_start} { printf("special_start "); return SPECIAL_START; }
{special_end} { printf("special_end "); return SPECIAL_END; }
{string} { printf("string "); yylval = strdup(yytext); return STRING; }
{integer} { printf("int "); yylval = strdup(yytext); return INTEGER; }
{num} { printf("num "); yylval = strdup(yytext); return NUM; }
\n { printf("\n"); return NL; }
/* Logical Operators */
or { printf("or "); return OR; }
and { printf("and "); return AND; }
not { printf("not "); return NOT; }
in { printf("in "); return IN; }
not\ in { printf("not in "); return NOTIN; }
/* Comparison Operators */
\< { printf("lessthan "); return LESSTHAN; }
\<= { printf("lessthanequal "); return LESSTHANEQUAL; }
\>= { printf("greaterthanequal "); return GREATERTHANEQUAL; }
\> { printf("greaterthan "); return GREATERTHAN; }
\== { printf("doubleequal "); return DOUBLEEQUAL; }
\!= { printf("notequal "); return NOTEQUAL; }
/* Miscellaneous Tokens */
: { printf("colon "); return COLON; }
\[ { printf("sq bracket start "); return SQUAREBRACKET_START; }
\] { printf("sq bracket end "); return SQUAREBRACKET_END; }
\% { printf("mod "); return MOD; }
sep { printf("sep "); return SEP; }
end { printf("end "); return
END; } print { printf("print ");
return PRINT; }
range { printf("range "); return
RANGE; } list { printf("list ");
return LIST; }
/* Identifiers */
[a-zA-Z_][_a-zA-Z0-9]* { printf("ID "); yylval = strdup(yytext); return ID; }
/* Unrecognized Characters */
[0-9;!,@#]*/(({alpha}|"_")({alpha}|{digits}|"_")*) { printf("%s\t", yytext); }
/* Quotes */
\' { printf("single quote "); return SINGLEQUOTE; }
\" { printf("double quote "); return DOUBLEQUOTE; }
/* Exponential Operator */
\*\* { printf("exp "); return EXPONENTIAL; }
/* Comma */
, { printf("comma "); return COMMA; }
%%
int yywrap() {
return 1;
}
Page
5
Parser Code
%{
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#include
<math.h>
#include
<ctype.h>
#define null 0
#define COUNT 5
#define YYSTYPE
char*
// Global Variables
int size = 0; // Size of symbol table
int v = 1; // Error flag
int g = 0; // Scope
int abc = 0; // Temporary value
int aeval = 0; // Arithmetic Expression
evaluation int lineno = 1;
extern int yylineno;
// Operator
identification int flag
= 0;
char label[2] = "l";
// Labels
int l_ = 0; // Label
count
char l [100] = {'\0'}; // Labels storage
char st[100][10]; // Stack for Intermediate Code
Generation (ICG) int top = 0; // Top of stack
int i_ = 0; // Count of temporary
variables in ICG char i [100] = {'\0'}; //
Temporary variables (t1, t2, ...) char temp[2]
= "t"; // Temp variable prefix
char ICG[10000] = ""; // Intermediate code
generation char try1[5][10];
char try[5]
[10]; int
relexp = 0;
int x = 0;
int x1 =
0; int
error = 0;
// Symbol Table
Structure struct
symbtab {
char
label[20];
char
type[20];
int value;
char
scope[20];
int lineno;
struct symbtab *next;
};
Page
6
// Abstract Syntax Tree
Structure typedef struct
AbstractSyntaxTree {
char *name;
struct AbstractSyntaxTree
*left; struct
AbstractSyntaxTree
*right;
} node;
// External Declarations
struct symbtab *first, *last, *temp1,
*temp2; extern FILE *yyin;
node* buildTree(char *, node *,
node *); void printTree(node *);
void push(char*);
void code_generation(int
val); void
code_generation_assign(
); void
code_generation_1();
void
code_generation_2();
void
code_generation_3();
void insert(char* l, char* t, int v, char* s,
int ln); struct symbtab* search(char
lab[]);
void
display();
int
yyerror();
node
*nodes;
%}
// Token Declarations
%token DIGITS ID PLUS MINUS MUL DIVIDE NUM STRING INTEGER SPECIAL NL
SPACE KEYWORD SPECIAL_START SPECIAL_END IF ELSE WHILE SEP END OR AND
NOT IN NOTIN T F COLON SQUAREBRACKET_START SQUAREBRACKET_END MOD
EQUAL PLUSEQUAL PRINT RANGE SINGLEQUOTE DOUBLEQUOTE EXPONENTIAL
COMMA INDENT LIST DIVIDEEQUAL MINUSEQUAL MULEQUAL LESSTHAN
LESSTHANEQUAL GREATERTHAN GREATERTHANEQUAL DOUBLEEQUAL
NOTEQUAL FUNCTION LAMBDA
%%
P : S { nodes = $$; }
;
In : INDENT { g = 1; }
| In INDENT { g = 1; }
Page
7
;
Assignment : ID assign E1
{ push($1);
code_generation_assign(
);
$$ = buildTree($2, buildTree($1, NULL, NULL), $3);
}
;
assign : DIVIDEEQUAL { $$
= "/="; }
| MULEQUAL { $$ = "*="; }
| MINUSEQUAL { $$ = "-="; }
| EQUAL { $$ = "="; }
| PLUSEQUAL { $$ = "+="; }
;
E1 : E1 OP1 E2 {
code_generation(aeval);
$$ = buildTree($2, $1, $3);
}
| E2
;
E2 : E2 OP2 E3 {
code_generation(aeval);
$$ = buildTree($2, $1, $3);
}
| E3
;
Page
8
error = 1;
} else {
abc = t_ptr->value;
aeval = (flag == 0) ? abc : (flag == 1 ? aeval + abc : (flag == 2 ? aeval -
abc : (flag == 3 ? aeval * abc
: aeval / abc)));
push($1);
$$ = buildTree($1, NULL, NULL);
}
}
}
;
Compound :
if_else LB
| only_if LB
| while_loop LB
;
Page
9
while_loop : WHILE condition COLON LB
IND { strcat(ICG, "goto ");
strcat(ICG, try1[--x1]);
strcat(ICG, "\n");
code_generation
_2(); x--;
$$ = buildTree("while", $2, $5);
}
;
Page1
0
| IN
| NOTIN
;
bool : T | F;
opor : OR;
opand :
AND; opnot
: NOT;
IND : IS { $$ = $1; }
;
X : STRING | ID | NUM |
INTEGER; LB : NL;
%%
int yyerror() {
printf("\n==============================SYNTAX ERROR: at
line number %d
==============================\n", yylineno - 1);
error = 1;
v = 0;
return 0;
}
Page1
1
strcpy(n->scope, s);
} else {
struct symbtab *p = malloc(sizeof(struct
symbtab)); strcpy(p->label, l);
strcpy(p-
>type, t); p-
>value = v;
p->lineno = ln;
strcpy(p-
>scope, s); p-
>next = null;
if (size ==
0) { first
= p; last
= p;
} else {
last->next =
p; last = p;
}
size++;
}
}
void display() {
struct symbtab *p = first;
printf("\n===================SYMBOL TABLE
before
Optimizations========================\n");
printf("LABEL\tTYPE\t\tVALUE\tSCOPE\
tLINENO\n"); for (; p != NULL; p = p-
>next) {
printf("%s\t%s\t%d\t%s\t%d\n", p->label, p->type, p->value, p->scope, p-
>lineno);
}
}
struct symbtab*
search(char lab[]) { struct
symbtab *p = first;
while (p != NULL) {
if (strcmp(p->label, lab)
== 0) { return p;
}
p = p->next;
}
return NULL;
}
Page1
2
if (tree->left != NULL || tree->right !=
NULL) { printf("(");
}
printf(" %s ", tree-
>name); if (tree-
>left != NULL) {
printTree(tree->left);
}
if (tree->right !=
NULL) {
printTree(tree-
>right);
}
if (tree->left != NULL || tree->right !=
NULL) { printf(")");
}
}
void push(char
*argu)
{ strcpy(st[+
+top], argu);
}
void code_generation(int
val) { strcpy(temp, "t");
sprintf(i , "%d",
i_); strcat(temp, i
); strcat(ICG,
temp); strcat(ICG,
"=");
strcat(ICG, st[top - 2]);
strcat(ICG, st[top -
1]); strcat(ICG,
st[top]);
strcat(ICG, "\n");
insert(temp, "identifier", val, "-",
yylineno - 1); top -= 2;
strcpy(st[top],
temp); i_++;
}
void code_generation_assign() {
struct symbtab *temp2 =
search(st[top - 1]); int val;
if (temp2 !=
NULL) { val =
temp2->value;
insert(st[top], "identifier", val, (g == 1) ? "local" : "global", yylineno - 1);
} else {
val = atoi(st[top - 1]);
insert(st[top], "identifier", val, (g == 1) ? "local" : "global", yylineno - 1);
}
strcat(ICG,
st[top]);
strcat(ICG,
"=");
strcat(ICG, st[top - 1]);
strcat(ICG, "\
n"); top -= 2;
Page1
3
}
void code_generation_1()
{ strcpy(label, "l");
sprintf(l , "%d",
l_); strcat(label, l
); strcat(ICG, "if
not "); strcat(ICG,
st[top]);
strcat(ICG, " goto
"); strcat(ICG,
label); strcat(ICG,
"\n");
strcpy(try[x],
label); l_++;
x++;
}
void code_generation_2()
{ strcat(ICG, try[--x]);
strcat(ICG, " : ");
}
void
code_generation_3()
{ strcat(ICG, label);
strcat(ICG, " : ");
strcpy(label, "l");
sprintf(l , "%d",
l_); strcat(label, l
); strcpy(try[x],
label); x++;
}
FILE* file =
fopen(argv[1], "r"); if
(file == NULL) {
printf("The file '%s' could not be found.\n",
argv[1]); return 1;
}
yyin = file;
yyparse();
if (!error) {
printf("\n==============Abstract Syntax
Tree===============\n"); printTree(nodes);
Page1
4
printf("\nCompilation ended successfully. You just compiled a Python source
code!\n");
} else {
printf("\nCompilation failed due to errors.\n");
}
fclose(fil
e);
return 0;
}
Page1
5