0% found this document useful (0 votes)
16 views20 pages

Report

The document outlines a report on a compiler designed for a subset of a Python-like programming language, detailing its language alphabet, context-free grammar, lexer, and parser code. It includes specifications for various programming constructs such as statements, expressions, and grammar rules. The report is part of a course project for the Fall 2024 semester by students Dima Bn Rezq and Shoug Alkanderi.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views20 pages

Report

The document outlines a report on a compiler designed for a subset of a Python-like programming language, detailing its language alphabet, context-free grammar, lexer, and parser code. It includes specifications for various programming constructs such as statements, expressions, and grammar rules. The report is part of a course project for the Fall 2024 semester by students Dima Bn Rezq and Shoug Alkanderi.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 20

Report

CE468
Introduction to Compilers and Translation Engineering
Compiler for a subset of Python-like programming
language

Name ID

Dima Bn Rezq 58514


Shoug Alkanderi 54686

Semester: Fall 2024

Page 1 of
15
Language Alphabet

Lowercase letters a, b, c, ..., z

Uppercase letters A, B, C, ..., Z

Digits 0, 1, 2, ..., 9

Symbols !, #, $, %, &, (, ), *, +, ,, -, ., /, :, ;, <, <=, =, >, >=, ?, [, ], {, },


^, |, ~, @, ", ', `, ==
!=
Keywords and, as, class, def, else, False, for, if, in, not, or, print, True,
import, alais, _init_ , self

Context Free Grammar (CFG)

<program> → <imports> <statements>


<imports> → <import> <imports>
| <empty>
<import> → "import" <module> ["as" <alias>]
| "from" <module> "import" <identifier> ["as" <alias>]
<module> → <identifier> ["." <module>]
<alias> → <identifier>
Statements
<statements> → <statement> <statements>
| <empty>

Statement Types
<statement> → <assignment>
| <function_call>
| <if_statement>
| <for_statement>
| <def_statement>
| <class_statement>
| <return_statement>
| <with_statement>
| <print_statement>
Assignment
<assignment> → <identifier> "=" <expression>
| <identifier> "[" <expression> "]" "=" <expression> # For
list indexing Function Call
<function_call> → <identifier> "(" [<arguments>] ")"
If Statement
<if_statement> → "if" <expression> ":"
<statements> ["elif"
<expression> ":" <statements>]
["else:" <statements>]

Page
2
For Statement
<for_statement> → "for" <identifier> "in" <expression> ":" <statements>
| "for" <identifier> "in" <expression> ":" <statements>
"else:" <statements> Function Definition
<def_statement> → "def" <identifier> "(" [<parameters>] ")" ":"
<statements>
Class Definition
<class_statement> → "class" <identifier> [ "(" [<parameters>] ")" ] ":"
<statements>
Return Statement
<return_statement> → "return" [<expression>]
Print Statement
<print_statement> → "print" "(" [<expression>] ")“
Expressions
<expression> → <identifier>
| <literal>
| <expression> <operator> <expression>
| "(" <expression> ")"
| <list_comprehension>
| <dict_comprehension>

Parameters
<parameters> → <identifier> ["," <parameters>]
| <empty>
Arguments
<arguments> → <expression> ["," <arguments>]
| <empty>
Literals
<literal> → <integer> | <float> | <string> | "True“ | "False“ | "None"
Operators
<operator> → "+" | "-" | "*" | "/" | "%" | "==" | "!=" | ">" | "<" | ">=" | "<="

Lexer Code

/* Token rules and regular expressions for


the lexer */ num [0-9]+(\.[0-9]+)?([Ee][\
+|\-]?[0-9]+)?
digits [0-9]
integer [0-9]+
string

(\".*\"|\'.*\')
divideequal \
/=
plusequal \+=
minusequal \-=
mulequal \*=
equal =
plus \+
minus \-
divide \/
mul \*
special_start \(
special_end \
)
alpha [a-zA-Z]

%{
#include
<stdio.h>
#include
"y.tab.h"
#include
<string.h>

extern YYSTYPE
yylval; int t = 1;
int n = 0;
%}

%%
/* Token Definitions */
\t { printf("indent "); return INDENT; } /* Indentation */
#.*\n { printf("comment "); return COMMENT; } /* Single-line
comment */ (''')(.*\n)*(.*)(''') { printf("comment "); return
COMMENT; } /* Multi-line comment */ class { printf("class ");
return CLASS; } /* Class keyword */
def { printf("function "); return FUNCTION; } /*
Function keyword */ if { printf("if "); return IF; } /* If
keyword */
else { printf("else "); return ELSE; } /* Else
keyword */ elif { printf("elseif "); return
ELSEIF; } /* Elif keyword */ for {
printf("for "); return FOR; } /* For loop keyword */
True { printf("True "); yylval = strdup(yytext); return T;
} /* True literal */ False { printf("False "); yylval =
strdup(yytext); return F; } /* False literal */

/* Arithmetic Operators */
{plus} { printf("plus "); yylval = strdup(yytext); return PLUS; }
{minus} { printf("minus "); yylval = strdup(yytext); return MINUS; }
{mul} { printf("mul "); yylval = strdup(yytext); return MUL; }
{divide} { printf("divide "); yylval = strdup(yytext); return DIVIDE; }

/* Assignment Operators */
{equal} { printf("equal "); yylval = strdup(yytext); return EQUAL; }
{plusequal} { printf("plusequal "); return PLUSEQUAL; }
{divideequal} { printf("divideequal "); return DIVIDEEQUAL; }
{mulequal} { printf("mulequal "); return MULEQUAL; }
{minusequal} { printf("minusequal "); return MINUSEQUAL; }

/* Special Characters */
{special_start} { printf("special_start "); return SPECIAL_START; }
{special_end} { printf("special_end "); return SPECIAL_END; }
{string} { printf("string "); yylval = strdup(yytext); return STRING; }
{integer} { printf("int "); yylval = strdup(yytext); return INTEGER; }
{num} { printf("num "); yylval = strdup(yytext); return NUM; }
\n { printf("\n"); return NL; }

/* Logical Operators */
or { printf("or "); return OR; }
and { printf("and "); return AND; }
not { printf("not "); return NOT; }
in { printf("in "); return IN; }
not\ in { printf("not in "); return NOTIN; }

/* Comparison Operators */
\< { printf("lessthan "); return LESSTHAN; }
\<= { printf("lessthanequal "); return LESSTHANEQUAL; }
\>= { printf("greaterthanequal "); return GREATERTHANEQUAL; }
\> { printf("greaterthan "); return GREATERTHAN; }
\== { printf("doubleequal "); return DOUBLEEQUAL; }
\!= { printf("notequal "); return NOTEQUAL; }

/* Miscellaneous Tokens */
: { printf("colon "); return COLON; }
\[ { printf("sq bracket start "); return SQUAREBRACKET_START; }
\] { printf("sq bracket end "); return SQUAREBRACKET_END; }
\% { printf("mod "); return MOD; }
sep { printf("sep "); return SEP; }
end { printf("end "); return
END; } print { printf("print ");
return PRINT; }
range { printf("range "); return
RANGE; } list { printf("list ");
return LIST; }

/* Identifiers */
[a-zA-Z_][_a-zA-Z0-9]* { printf("ID "); yylval = strdup(yytext); return ID; }

/* Unrecognized Characters */
[0-9;!,@#]*/(({alpha}|"_")({alpha}|{digits}|"_")*) { printf("%s\t", yytext); }

/* Quotes */
\' { printf("single quote "); return SINGLEQUOTE; }
\" { printf("double quote "); return DOUBLEQUOTE; }

/* Exponential Operator */
\*\* { printf("exp "); return EXPONENTIAL; }

/* Comma */
, { printf("comma "); return COMMA; }

%%

int yywrap() {
return 1;
}

Page
5
Parser Code

%{
#include
<stdio.h>
#include
<stdlib.h>
#include
<string.h>
#include
<math.h>
#include
<ctype.h>

#define null 0
#define COUNT 5
#define YYSTYPE
char*

// Global Variables
int size = 0; // Size of symbol table
int v = 1; // Error flag
int g = 0; // Scope
int abc = 0; // Temporary value
int aeval = 0; // Arithmetic Expression
evaluation int lineno = 1;
extern int yylineno;

// Operator
identification int flag
= 0;
char label[2] = "l";
// Labels
int l_ = 0; // Label
count
char l [100] = {'\0'}; // Labels storage
char st[100][10]; // Stack for Intermediate Code
Generation (ICG) int top = 0; // Top of stack
int i_ = 0; // Count of temporary
variables in ICG char i [100] = {'\0'}; //
Temporary variables (t1, t2, ...) char temp[2]
= "t"; // Temp variable prefix
char ICG[10000] = ""; // Intermediate code
generation char try1[5][10];
char try[5]
[10]; int
relexp = 0;
int x = 0;
int x1 =
0; int
error = 0;

// Symbol Table
Structure struct
symbtab {
char
label[20];
char
type[20];
int value;
char
scope[20];
int lineno;
struct symbtab *next;
};

Page
6
// Abstract Syntax Tree
Structure typedef struct
AbstractSyntaxTree {
char *name;
struct AbstractSyntaxTree
*left; struct
AbstractSyntaxTree
*right;
} node;

// External Declarations
struct symbtab *first, *last, *temp1,
*temp2; extern FILE *yyin;
node* buildTree(char *, node *,
node *); void printTree(node *);
void push(char*);
void code_generation(int
val); void
code_generation_assign(
); void
code_generation_1();
void
code_generation_2();
void
code_generation_3();
void insert(char* l, char* t, int v, char* s,
int ln); struct symbtab* search(char
lab[]);
void
display();
int
yyerror();
node
*nodes;

%}

// Token Declarations
%token DIGITS ID PLUS MINUS MUL DIVIDE NUM STRING INTEGER SPECIAL NL
SPACE KEYWORD SPECIAL_START SPECIAL_END IF ELSE WHILE SEP END OR AND
NOT IN NOTIN T F COLON SQUAREBRACKET_START SQUAREBRACKET_END MOD
EQUAL PLUSEQUAL PRINT RANGE SINGLEQUOTE DOUBLEQUOTE EXPONENTIAL
COMMA INDENT LIST DIVIDEEQUAL MINUSEQUAL MULEQUAL LESSTHAN
LESSTHANEQUAL GREATERTHAN GREATERTHANEQUAL DOUBLEEQUAL
NOTEQUAL FUNCTION LAMBDA

%%
P : S { nodes = $$; }
;

S : Simple S { $$ = buildTree("SEQ", $1, $2); }


| Compound S { $$ = buildTree("SEQ", $1, $2); }
| { $$ = buildTree("NULL", NULL, NULL); }
;

IS : In Simple IS { $$ = buildTree("SEQ", $2, $3); }


| In Compound IS { $$ = buildTree("SEQ", $2, $3); }
| { $$ = buildTree("NULL", NULL, NULL); }
;

In : INDENT { g = 1; }
| In INDENT { g = 1; }

Page
7
;

Simple : Assignment LB { aeval = 0; $$ = $1; }


| Print LB
;

Assignment : ID assign E1
{ push($1);
code_generation_assign(
);
$$ = buildTree($2, buildTree($1, NULL, NULL), $3);
}
;

assign : DIVIDEEQUAL { $$
= "/="; }
| MULEQUAL { $$ = "*="; }
| MINUSEQUAL { $$ = "-="; }
| EQUAL { $$ = "="; }
| PLUSEQUAL { $$ = "+="; }
;

E1 : E1 OP1 E2 {
code_generation(aeval);
$$ = buildTree($2, $1, $3);
}
| E2
;

E2 : E2 OP2 E3 {
code_generation(aeval);
$$ = buildTree($2, $1, $3);
}
| E3
;

E3 : E4 EXPONENTIAL { push("**"); } E3 { code_generation(aeval); }


| E4
;

E4 : INTEGER | NUM | ID | SPECIAL_START E1 SPECIAL_END {


if ($1) {
abc = atoi($1);
aeval = (flag == 0) ? abc : (flag == 1 ? aeval + abc : (flag == 2 ? aeval - abc
: (flag == 3 ? aeval * abc : aeval / abc)));
push($1);
$$ = buildTree($1, NULL, NULL);
} else {
t_ptr =
search($1); if
(t_ptr ==
NULL) {
printf("\n=====================-ERROR:
variable
%s
undeclared===========================\n", $1);

Page
8
error = 1;
} else {
abc = t_ptr->value;
aeval = (flag == 0) ? abc : (flag == 1 ? aeval + abc : (flag == 2 ? aeval -
abc : (flag == 3 ? aeval * abc
: aeval / abc)));
push($1);
$$ = buildTree($1, NULL, NULL);
}
}
}
;

OP1 : PLUS { flag = 1; push("+");


$$ = "+"; }
| MINUS { flag = 2; push("-"); $$
= "-"; }
;

OP2 : MUL { flag = 3; push("*");


$$ = "*"; }
| DIVIDE { flag = 4; push("/"); $$
= "/"; }
;

Compound :
if_else LB
| only_if LB
| while_loop LB
;

only_if : IF condition COLON LB IND


{ strcpy(try1[x1], label);
code_generation_2();
x--;
x1+
+;
$$ = buildTree("IF", $2, $5);
}
;

if_else : IF condition COLON LB IND


{ strcat(ICG, "goto ");
char label_temp[2] =
"l"; sprintf(l , "%d",
l_); strcat(label_temp,
l ); strcat(ICG,
label_temp);
strcpy(try1[x1],
label_temp); x1++;
strcat(ICG, "\n");
}
else_1 { code_generation_2(); x--; l_++; $$ = buildTree("SEQ",
buildTree("IF", $2, $5), $7); }
;

else_1 : ELSE { code_generation_3(); } COLON LB IND { $$ = buildTree("ELSE",


$5, NULL); }
;

Page
9
while_loop : WHILE condition COLON LB
IND { strcat(ICG, "goto ");
strcat(ICG, try1[--x1]);
strcat(ICG, "\n");
code_generation
_2(); x--;
$$ = buildTree("while", $2, $5);
}
;

condition : cond { $$ = $1; }


;

cond : cond opor cond1 { $$ = buildTree("or", $1, $3); }


| cond1
;

cond1 : cond1 opand cond2 { $$ = buildTree("and", $1, $3); }


| cond2
;

cond2 : opnot cond2 { $$ = buildTree("not", $2, NULL); }


| cond3
;

cond3 : SPECIAL_START cond SPECIAL_END { $$ = $2; }


| relexp {
code_generation_1();
}
| bool
;

relexp : relexp relop


E1 { strcpy(label,
"l");
sprintf(l , "%d",
l_); strcat(label, l
); strcat(ICG,
label); strcat(ICG,
" : ");
strcpy(try1[x1],
label); l_++;
x1++;
code_generation(aeval);
$$ = buildTree($2, $1, $3);
}
| ID { push($1); $$ = buildTree($1, NULL, NULL); }
| NUM { push($1); $$ = buildTree($1, NULL, NULL); }
| INTEGER { push($1); $$ = buildTree($1, NULL, NULL); }
;

relop : compare { $$ = $1; }

Page1
0
| IN
| NOTIN
;

compare : LESSTHAN { $$ = "<"; push("<"); }


| LESSTHANEQUAL { $$ = "<="; push("<="); }
| GREATERTHAN { $$ = ">"; push(">"); }
| GREATERTHANEQUAL { $$ = ">="; push(">="); }
| DOUBLEEQUAL { $$ = "=="; push("=="); }
| NOTEQUAL { $$ = "!="; push("!="); }
;

bool : T | F;

opor : OR;

opand :

AND; opnot

: NOT;

IND : IS { $$ = $1; }
;

Print : PRINT SPECIAL_START toprint SPECIAL_END


| PRINT SPECIAL_START toprint SEP EQUAL STRING END EQUAL STRING
SPECIAL_END
| PRINT SPECIAL_START toprint SEP EQUAL STRING SPECIAL_END
| PRINT SPECIAL_START toprint END EQUAL STRING SPECIAL_END
;

toprint : X | X COMMA toprint;

X : STRING | ID | NUM |

INTEGER; LB : NL;

%%

int yyerror() {
printf("\n==============================SYNTAX ERROR: at
line number %d
==============================\n", yylineno - 1);
error = 1;
v = 0;
return 0;
}

void insert(char* l, char* t, int v, char* s,


int ln) { struct symbtab *n = search(l);
if (n != NULL) {
n->value =
v; n-
>lineno =
ln;

Page1
1
strcpy(n->scope, s);
} else {
struct symbtab *p = malloc(sizeof(struct
symbtab)); strcpy(p->label, l);
strcpy(p-
>type, t); p-
>value = v;
p->lineno = ln;
strcpy(p-
>scope, s); p-
>next = null;
if (size ==
0) { first
= p; last
= p;
} else {
last->next =
p; last = p;
}
size++;
}
}

void display() {
struct symbtab *p = first;
printf("\n===================SYMBOL TABLE
before
Optimizations========================\n");
printf("LABEL\tTYPE\t\tVALUE\tSCOPE\
tLINENO\n"); for (; p != NULL; p = p-
>next) {
printf("%s\t%s\t%d\t%s\t%d\n", p->label, p->type, p->value, p->scope, p-
>lineno);
}
}

struct symbtab*
search(char lab[]) { struct
symbtab *p = first;
while (p != NULL) {
if (strcmp(p->label, lab)
== 0) { return p;
}
p = p->next;
}
return NULL;
}

node* buildTree(char *op, node *left, node


*right) { node *new =
(node*)malloc(sizeof(node));
new->name =
strdup(op); new-
>left = left;
new->right =
right; return new;
}
void printTree(node *tree) {

Page1
2
if (tree->left != NULL || tree->right !=
NULL) { printf("(");
}
printf(" %s ", tree-
>name); if (tree-
>left != NULL) {
printTree(tree->left);
}
if (tree->right !=
NULL) {
printTree(tree-
>right);
}
if (tree->left != NULL || tree->right !=
NULL) { printf(")");
}
}

void push(char
*argu)
{ strcpy(st[+
+top], argu);
}

void code_generation(int
val) { strcpy(temp, "t");
sprintf(i , "%d",
i_); strcat(temp, i
); strcat(ICG,
temp); strcat(ICG,
"=");
strcat(ICG, st[top - 2]);
strcat(ICG, st[top -
1]); strcat(ICG,
st[top]);
strcat(ICG, "\n");
insert(temp, "identifier", val, "-",
yylineno - 1); top -= 2;
strcpy(st[top],
temp); i_++;
}

void code_generation_assign() {
struct symbtab *temp2 =
search(st[top - 1]); int val;
if (temp2 !=
NULL) { val =
temp2->value;
insert(st[top], "identifier", val, (g == 1) ? "local" : "global", yylineno - 1);
} else {
val = atoi(st[top - 1]);
insert(st[top], "identifier", val, (g == 1) ? "local" : "global", yylineno - 1);
}
strcat(ICG,
st[top]);
strcat(ICG,
"=");
strcat(ICG, st[top - 1]);
strcat(ICG, "\
n"); top -= 2;

Page1
3
}

void code_generation_1()
{ strcpy(label, "l");
sprintf(l , "%d",
l_); strcat(label, l
); strcat(ICG, "if
not "); strcat(ICG,
st[top]);
strcat(ICG, " goto
"); strcat(ICG,
label); strcat(ICG,
"\n");

strcpy(try[x],
label); l_++;
x++;
}

void code_generation_2()
{ strcat(ICG, try[--x]);
strcat(ICG, " : ");
}

void
code_generation_3()
{ strcat(ICG, label);
strcat(ICG, " : ");
strcpy(label, "l");
sprintf(l , "%d",
l_); strcat(label, l
); strcpy(try[x],
label); x++;
}

int main(int argc, char


*argv[]) { if (argc != 2)
{
printf("Wrong command. Please provide the input
file path.\n"); return 1;
}

FILE* file =
fopen(argv[1], "r"); if
(file == NULL) {
printf("The file '%s' could not be found.\n",
argv[1]); return 1;
}

yyin = file;
yyparse();

if (!error) {
printf("\n==============Abstract Syntax
Tree===============\n"); printTree(nodes);

Page1
4
printf("\nCompilation ended successfully. You just compiled a Python source
code!\n");
} else {
printf("\nCompilation failed due to errors.\n");
}

fclose(fil
e);
return 0;
}

Page1
5

You might also like