0% found this document useful (0 votes)
96 views48 pages

Compiler

The document describes developing an operator precedence parsing table for a programming language. It includes code to define terminals, populate the table with operator precedence values, implement a stack-based parser, and test strings using the table. The table is used to shift or reduce tokens based on the precedence of the current stack and input values. The code demonstrates constructing an operator precedence parsing table and using it for stack-based parsing.

Uploaded by

bcikygupta
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
96 views48 pages

Compiler

The document describes developing an operator precedence parsing table for a programming language. It includes code to define terminals, populate the table with operator precedence values, implement a stack-based parser, and test strings using the table. The table is used to shift or reduce tokens based on the precedence of the current stack and input values. The code demonstrates constructing an operator precedence parsing table and using it for stack-based parsing.

Uploaded by

bcikygupta
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 48

Develop A Lexical Analyzer to Recognize A Few Patterns in C

#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
bool isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == ',' || ch == ';' || ch == '>' ||
ch == '<' || ch == '=' || ch == '(' || ch == ')' ||
ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);
}
bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == '>' || ch == '<' ||
ch == '=')
return (true);
return (false);
}
bool validIdentifier(char* str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' ||
str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}
bool isKeyword(char* str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") ||
!strcmp(str, "continue") || !strcmp(str, "int")
|| !strcmp(str, "double") || !strcmp(str, "float")
|| !strcmp(str, "return") || !strcmp(str, "char")
|| !strcmp(str, "case") || !strcmp(str, "char")
|| !strcmp(str, "sizeof") || !strcmp(str, "long")
|| !strcmp(str, "short") || !strcmp(str, "typedef")
|| !strcmp(str, "switch") || !strcmp(str, "unsigned")
|| !strcmp(str, "void") || !strcmp(str, "static")
|| !strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}
bool isInteger(char* str)
{
int i, len = strlen(str);
if (len == 0)
return (false);
for (i = 0; i<len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' || (str[i] == '-' &&i> 0))
return (false);
}
return (true);
}
bool isRealNumber(char* str)
{
int i, len = strlen(str);
bool hasDecimal = false;
if (len == 0)
return (false);
for (i = 0; i<len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' && str[i] != '.' ||
(str[i] == '-' &&i> 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc(sizeof(char) * (right - left + 2));
for (i = left; i<= right; i++)
subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}
void parse(char* str)
{
int left = 0, right = 0;
int len = strlen(str);

while (right <= len&& left <= right) {


if (isDelimiter(str[right]) == false)
right++;

if (isDelimiter(str[right]) == true && left == right) {


if (isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]);

right++;
left = right;
} else if (isDelimiter(str[right]) == true &&left != right
|| (right == len&&left != right)) {
char* subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);

else if (isInteger(subStr) == true)


printf("'%s' IS AN INTEGER\n", subStr);

else if (isRealNumber(subStr) == true)


printf("'%s' IS A REAL NUMBER\n", subStr);

else if (validIdentifier(subStr) == true


&&isDelimiter(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);

else if (validIdentifier(subStr) == false


&&isDelimiter(str[right - 1]) == false)
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}
int main()
{
char str[100] = "int a = b + 1c; ";
parse(str);
return (0);
}
Write a Program parse using brute force technique of top down parsing
#include<stdio.h>
int main()
{
int a[30];
int min=10000,temp=0,i,lev,n,noofc,z;
printf("Enter Number");
scanf("%d",&n);
for(i=0;i<n;i++)
a[i]=0;
printf("Enter value of root");
scanf("%d",&a[0]);
for(i=1;i<=n/2;i++)
{
printf("please enter no of child of parent with value %d",a[i-1]);
scanf("%d",&noofc);
for(int j=1;j<=noofc;j++)
{
z=(i)*2+j-2;
printf("please enter value of child");
scanf("%d",&a[z]);
}
}
for(i=n-1;i>=n/2;i--)
{
temp=0;
for(int j=i+1;j>=1;j=j/2)
temp=temp+a[j-1];
if(temp<min)
min=temp;
printf("temp min is %d",temp);
printf("\n");
}
printf("min is %d",min);
return 0;
}

OUTPUT:
Construct LL(1) Parser
#include <iostream.h>
#include <conio.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
void main()
{
clrscr();
int i=0,j=0,k=0,m=0,n=0,o=0,o1=0,var=0,l=0,f=0,c=0,f1=0;
char str[30],str1[40]="E",temp[20],temp1[20],temp2[20],tt[20],t3[20];
strcpy(temp1,'\0');
strcpy(temp2,'\0');
char t[10];
char array[6][5][10] = {
"NT", "<id>","+","*",";",
"E", "Te","Error","Error","Error",
"e", "Error","+Te","Error","\0",
"T", "Vt","Error","Error","Error",
"t", "Error","\0","*Vt","\0",
"V", "<id>","Error","Error","Error"
};
cout<< "\n\tLL(1) PARSER TABLE \n";
for(i=0;i<6;i++)
{
for(j=0;j<5;j++)
{
cout.setf(ios::right);
cout.width(10);
cout<<array[i][j];
}
cout<<endl;
}
cout<<endl;
cout<< "\n\tENTER THE STRING :";
gets(str);
if(str[strlen(str)-1] != ';')
{
cout<< "END OF STRING MARKER SHOULD BE ';'";
getch();
exit(1);
}
cout<< "\n\tCHECKING VALIDATION OF THE STRING ";
cout<<"\n\t" << str1;
i=0;

while(i<strlen(str))
{
again:
if(str[i] == ' ' &&i<strlen(str))
{
cout<< "\n\tSPACES IS NOT ALLOWED IN SOURSE STRING ";
getch();
exit(1);
}
temp[k]=str[i];
temp[k+1]='\0';
f1=0;
again1:
if(i>=strlen(str))
{
getch();
exit(1);
}
for(int l=1;l<=4;l++)
{
if(strcmp(temp,array[0][l])==0)
{
f1=1;
m=0,o=0,var=0,o1=0;
strcpy(temp1,'\0');
strcpy(temp2,'\0');
int len=strlen(str1);
while(m<strlen(str1) && m<strlen(str))
{
if(str1[m]==str[m])
{
var=m+1;
temp2[o1]=str1[m];
m++;
o1++;
}
else
{
if((m+1)<strlen(str1))
{
m++;
temp1[o]=str1[m];
o++;
}
else
m++;
}

}
temp2[o1] = '\0';
temp1[o] = '\0';
t[0] = str1[var];
t[1] = '\0';
for(n=1;n<=5;n++)
{
if(strcmp(array[n][0],t)==0)
break;
}
strcpy(str1,temp2);
strcat(str1,array[n][l]);
strcat(str1,temp1);
cout<< "\n\t" <<str1;
getch();

if(strcmp(array[n][l],'\0')==0)
{
if(i==(strlen(str)-1))
{
int len=strlen(str1);
str1[len-1]='\0';
cout<< "\n\t"<<str1;
cout<< "\n\n\tENTERED STRING IS VALID";
getch();
exit(1);
}
strcpy(temp1,'\0');
strcpy(temp2,'\0');
strcpy(t,'\0');
goto again1;
}
if(strcmp(array[n][l],"Error")==0)
{
cout<< "\n\tERROR IN YOUR SOURCE STRING";
getch();
exit(1);
}
strcpy(tt,'\0');
strcpy(tt,array[n][l]);
strcpy(t3,'\0');
f=0;
for(c=0;c<strlen(tt);c++)
{
t3[c]=tt[c];
t3[c+1]='\0';
if(strcmp(t3,temp)==0)
{
f=0;
break;
}
else
f=1;
}

if(f==0)
{
strcpy(temp,'\0');
strcpy(temp1,'\0');
strcpy(temp2,'\0');
strcpy(t,'\0');
i++;
k=0;
goto again;
}
else
{
strcpy(temp1,'\0');
strcpy(temp2,'\0');
strcpy(t,'\0');
goto again1;
}
}
}
i++;
k++;
}
if(f1==0)
cout<< "\nENTERED STRING IS INVALID";
else
cout<< "\n\n\tENTERED STRING IS VALID";
getch();
}

OUTPUT:
Develop an Operator Precedence Table
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main(){
char stack[20],ip[20],opt[10][10][1],ter[10];
int i,j,k,n,top=0,col,row;
clrscr();
for(i=0;i<10;i++)
{
stack[i]=NULL;
ip[i]=NULL;
for(j=0;j<10;j++)
{
opt[i][j][1]=NULL;
}
}
printf("Enter the no.of terminals :\n");
scanf("%d",&n);
printf("\nEnter the terminals :\n");
scanf("%s",&ter);
printf("\nEnter the table values :\n");
for(i=0;i<n;i++)
{
for(j=0;j<n;j++)
{
printf("Enter the value for %c %c:",ter[i],ter[j]);
scanf("%s",opt[i][j]);
}
}
printf("\n**** OPERATOR PRECEDENCE TABLE ****\n");
for(i=0;i<n;i++)
{
printf("\t%c",ter[i]);
}
printf("\n");
for(i=0;i<n;i++){printf("\n%c",ter[i]);
for(j=0;j<n;j++){printf("\t%c",opt[i][j][0]);}}
stack[top]='$';
printf("\nEnter the input string:");
scanf("%s",ip);
i=0;
printf("\nSTACK\t\t\tINPUT STRING\t\t\tACTION\n");
printf("\n%s\t\t\t%s\t\t\t",stack,ip);
while(i<=strlen(ip))
{
for(k=0;k<n;k++)
{
if(stack[top]==ter[k])
col=k;
if(ip[i]==ter[k])
row=k;
}
if((stack[top]=='$')&&(ip[i]=='$')){
printf("String is accepted\n");
break;}
else if((opt[col][row][0]=='<') ||(opt[col][row][0]=='='))
{ stack[++top]=opt[col][row][0];
stack[++top]=ip[i];
printf("Shift %c",ip[i]);
i++;
}
else{
if(opt[col][row][0]=='>')
{
while(stack[top]!='<'){--top;}
top=top-1;
printf("Reduce");
}
else
{
printf("\nString is not accepted");
break;
}
}
printf("\n");
for(k=0;k<=top;k++)
{
printf("%c",stack[k]);
}
printf("\t\t\t");
for(k=i;k<strlen(ip);k++){
printf("%c",ip[k]);
}
printf("\t\t\t");
}
getch();
}

OUTPUT:
Develop a recursive descent parser
#include<stdio.h>
#include<ctype.h>
#include<string.h>
void Tprime();
void Eprime();
void E();
void check();
void T();
char expression[10];
int count, flag;
int main()
{
count = 0;
flag = 0;
printf("\nEnter an Algebraic Expression:\t");
scanf("%s", expression);
E();
if((strlen(expression) == count) && (flag == 0))
{
printf("\nThe Expression %s is Valid\n", expression);
}
else
{
printf("\nThe Expression %s is Invalid\n", expression);
}
}
void E()
{
T();
Eprime();
}
void T()
{
check();
Tprime();
}
void Tprime()
{
if(expression[count] == '*')
{
count++;
check();
Tprime();
}
}
void check()
{
if(isalnum(expression[count]))
{
count++;
}
else if(expression[count] == '(')
{
count++;
E();
if(expression[count] == ')')
{
count++;
}
else
{
flag = 1;
}
}
else
{
flag = 1;
}
}
void Eprime()
{
if(expression[count] == '+')
{
count++;
T();
Eprime();
}
}

Write a program to simulate Heap Storage Allocation Technique


#include<stdio.h>
#include<stdlib.h>
#define TRUE 1
#define FALSE 0
typedef struct Heap
{
int data;
struct Heap *next;
}
node;
node *create();
void main()
{
int choice,val;
char ans;
node *head;
void display(node *);
node *search(node *,int);
node *insert(node *);
void dele(node **);
head=NULL;
do
{
printf("\nprogram to perform various operations on heap using dynamic memory management");
printf("\n1.create");
printf("\n2.display");
printf("\n3.insert an element in a list");
printf("\n4.delete an element from list");
printf("\n5.quit");
printf("\nenter your chioce(1-5)");
scanf("%d",&choice);
switch(choice)
{
case 1:head=create();
break;
case 2:display(head);
break;
case 3:head=insert(head);
break;
case 4:dele(&head);
break;
case 5:exit(0);
default:
printf("invalid choice,try again");
}
}
while(choice!=5);
}
node* create()
{
node *temp,*New,*head;
int val,flag;
char ans='y';
node *get_node();
temp=NULL;
flag=TRUE;
do
{
printf("\n enter the element:");
scanf("%d",&val);
New=get_node();
if(New==NULL)
printf("\nmemory is not allocated");
New->data=val;
if(flag==TRUE)
{
head=New;
temp=head;
flag=FALSE;
}
else
{
temp->next=New;
temp=New;
}
printf("\ndo you want to enter more elements?(y/n)");
scanf("%s",ans);
}
while(ans=='y');
printf("\nthe list is created\n");
return head;
}
node *get_node()
{
node *temp;
temp=(node*)malloc(sizeof(node));
temp->next=NULL;
return temp;
}
void display(node *head)
{
node *temp;
temp=head;
if(temp==NULL)
{
printf("\nthe list is empty\n");
return;
}
while(temp!=NULL)
{
printf("%d->",temp->data);
temp=temp->next;
}
printf("NULL");
}
node *search(node *head,int key)
{
node *temp;
int found;
temp=head;
if(temp==NULL)
{
printf("the linked list is empty\n");
return NULL;
}
found=FALSE;
while(temp!=NULL && found==FALSE)
{
if(temp->data!=key)
temp=temp->next;
else
found=TRUE;
}
if(found==TRUE)
{
printf("\nthe element is present in the list\n");
return temp;
}
else
{
printf("the element is not present in the list\n");
return NULL;
}
}
node *insert(node *head)
{
int choice;
node *insert_head(node *);
void insert_after(node *);
void insert_last(node *);
printf("n1.insert a node as a head node");
printf("n2.insert a node as a head node");
printf("n3.insert a node at intermediate position in t6he list");
printf("\nenter your choice for insertion of node:");
scanf("%d",&choice);
switch(choice)
{
case 1:head=insert_head(head);
break;
case 2:insert_last(head);
break;
case 3:insert_after(head);
break;
}
return head;
}
node *insert_head(node *head)
{
node *New,*temp;
New=get_node();
printf("\nEnter the element which you want to insert");
scanf("%d",&New->data);
if(head==NULL)
head=New;
else
{
temp=head;
New->next=temp;
head=New;
}
return head;
}
void insert_last(node *head)
{
node *New,*temp;
New=get_node();
printf("\nenter the element which you want to insert");
scanf("%d",&New->data);
if(head==NULL)
head=New;
else
{
temp=head;
while(temp->next!=NULL)
temp=temp->next;
temp->next=New;
New->next=NULL;
}
}
void insert_after(node *head)
{
int key;
node *New,*temp;
New=get_node();
printf("\nenter the elements which you want to insert");
scanf("%d",&New->data);
if(head==NULL)
{
head=New;
}
else
{
printf("\enter the element which you want to insert the node");
scanf("%d",&key);
temp=head;
do
{
if(temp->data==key)
{
New->next-temp->next;
temp->next=New;
return;
}
else
temp=temp->next;
}
while(temp!=NULL);
}
}
node *get_prev(node *head,intval)
{
node *temp,*prev;
int flag;
temp=head;
if(temp==NULL)
return NULL;
flag=FALSE;
prev=NULL;
while(temp!=NULL && ! flag)
{
if(temp->data!=val)
{
prev=temp;
temp=temp->next;
}
else
flag=TRUE;
}
if(flag)
return prev;
else
return NULL;
}
void dele(node **head)
{
node *temp,*prev;
int key;
temp=*head;
if(temp==NULL)
{
printf("\nthe list is empty\n");
return;
}
printf("\nenter the element you want to delete:");
scanf("%d",&key);
temp=search(*head,key);
if(temp!=NULL)
{
prev=get_prev(*head,key);
if(prev!=NULL)
{
prev->next=temp->next;
free(temp);
}
else
{
*head=temp->next;
free(temp);
}
printf("\nthe element is deleted\n");

}
}
OUTPUT:

Generate Lexical Analyzer using LEX


%{
int COMMENT=0;
%}
identifier [a-zA-Z][a-zA-Z0-9]*
%%
#.* {printf("\n%s is a preprocessor directive",yytext);}
int |
float |
char |
double |
while |
for |
struct |
typedef |
do |
if |
break |
continue |
void |
switch |
return |
else |
goto {printf("\n\t%s is a keyword",yytext);}
"/*" {COMMENT=1;}{printf("\n\t %s is a COMMENT",yytext);}
{identifier}\( {if(!COMMENT)printf("\nFUNCTION \n\t%s",yytext);}
\{ {if(!COMMENT)printf("\n BLOCK BEGINS");}
\} {if(!COMMENT)printf("BLOCK ENDS ");}
{identifier}(\[[0-9]*\])? {if(!COMMENT) printf("\n %s IDENTIFIER",yytext);}
\".*\" {if(!COMMENT)printf("\n\t %s is a STRING",yytext);}
[0-9]+ {if(!COMMENT) printf("\n %s is a NUMBER ",yytext);}
\)(\:)? {if(!COMMENT)printf("\n\t");ECHO;printf("\n");}
\( ECHO;
= {if(!COMMENT)printf("\n\t %s is an ASSIGNMENT OPERATOR",yytext);}
\<= |
\>= |
\< |
== |
\> {if(!COMMENT) printf("\n\t%s is a RELATIONAL OPERATOR",yytext);}
%%
int main(int argc, char **argv)
{
FILE *file;
file=fopen("var.c","r");
if(!file)
{
printf("could not open the file");
exit(0);
}
yyin=file;
yylex();
printf("\n");
return(0);
}
int yywrap()
{
return(1);
}

OUTPUT:
YACC program to recognize a valid arithmetic expression that uses operators
+, -, * and /.
LEX
%{
#include"y.tab.h"externyylval;
%}
%%
[0-9]+ {yylval=atoi(yytext); return NUMBER;}
[a-zA-Z]+ {return ID;}
[\t]+ ;
\n {return 0;}
. {return yytext[0];}
%%
YACC
%{
#include<stdio.h>
%}
%token NUMBER ID
%left '+' '-'
%left '*' '/'
%%
expr: expr '+' expr
|expr '-' expr
|expr '*' expr
|expr '/' expr
|'-'NUMBER|
'-'ID
|'('expr')'
|NUMBER
|ID;
%%
main()
{
printf("Enter the expression\n");
yyparse();
printf("\nExpression is valid\n");
exit(0);
}
int yyerror(char *s)
{
printf("\nExpression is invalid");
exit(0);
}
LEX
%{
#include"y.tab.h"externyylval;
%}
%%
[0-9]+ {yylval=atoi(yytext); return DIGIT;}
[a-zA-Z]+ {return LETTER;}
[\t] ;
\n return 0;
.{return yytext[0];}
%%
YACC
%{
#include<stdio.h>
%}
%token LETTER DIGIT
%%
variable: LETTER|LETTER rest
;
rest: LETTER rest
|DIGIT rest
|LETTER
|DIGIT
;
%%
main()
{
yyparse();
printf("The string is a valid variable\n");
}
int yyerror(char *s)
{
printf("this is not a valid variable\n");
exit(0);
}
USING LEX TOOL:
%{
#include<stdio.h>
#include"y.tab.h"
int c;
extern int yylaval;
%}
%%
"";
[a-z]
{
c=yytext[0];
yylaval=c-'a';
return(LETTER);
}
[0-9]
{
c=yytext[0];
yylaval=c-'0';
return(DIGIT);
}
[a-z0-9/b]
{
c=yytext[0];
return(c);
}
USING YACC TOOL:
%{
#include<stdio.h>
int regs[26];
int base;
%}
%start list
%token DIGIT LETTER
%left'|'
%left'&'
%left'+''-'
%left'*''/''%'
%left UMINUS
%%
list:
|
list stat'\n'
|list error'\n'
{
yyerror();
};
stat:expr
{
printf("%d\n",$1);
}
|
LETTER'='expr
{
regs[$1]=$3;
};
expr:'('expr')'
{
$$=$2;
}
|
expr'*'expr
{
$$=$1*$3;
}
expr'/'expr
{
$$=$1/$3;
}
|
expr'%'expr
{
$$=$1%$3;
}
|
expr'+'expr
{
$$=$1+$3;
}
|
expr'-'expr
{
$$=$1-$3;
}
|
expr'&'expr
{
$$=$1&$3;
}
|
expr'|'expr
{
$$=$1|$3;
}
|
'-'expr%prec UMINUS
{
$$=-$2;
}
|
LETTER
{
$$=regs[$1];
}
|
number;
number:DIGIT
{
$$=$1;
base=($1==0)?8:10;
}

number DIGIT
{
$$=base*$1+$2;
}
%%
main()
{
return(yyparse());
}
yyerror(s)
char*s;
{
fprintf(stderr,"%s\n",s);
}
yywrap()
{
return(1);
}

Program for implementation of Code Optimization Technique in for and do-while loop using
C++

Program:
Before:
Using for:
#include<iostream.h>
#include <conio.h>
int main()
{
int i, n;
int fact=1;
cout<<"\nEnter a number: ";
cin>>n;
for(i=n ; i>=1 ; i--)
fact = fact * i;
cout<<"The factoral value is: "<<fact;
getch();
return 0;
}
After:
Using do-while:
#include<iostream.h>
#include<conio.h>
void main()
{
clrscr();
int n,f;
f=1;
cout<<"Enter the number:\n";
cin>>n;
do
{
f=f*n;
n--;
}while(n>0);
cout<<"The factorial value is:"<<f;
getch();
}
OUTPUT:

Criteria for evaluation:


USING for LOOP
Enter the number: 5
The factorial value is: 120
USING do-while LOOP
Enter the number: 3
The factorial value is: 6
Study on an object oriented complier.
A compiler takes a program in a source language, creates some internal representation while
checking the syntax of the program, performs semantic checks, and finally generates something
that can be executed to produce the intended effect of the program. The obvious candidate for
object technology in a compiler is the symbol table: a mapping from user-defined names to their
properties as expressed in the program. It turns out however, that compiler implementation can
benefit from object technology in many more areas.
It turns out that OOP can be applied productively in every phase of a compiler implementation
and it delivers the expected benefits: objects enforce information hiding and state
encapsulation, methods help to develop by divide and conquer, all work is carried out by
messages which can be debugged by instrumenting their methods. Most importantly, classes
encourage code reuse between projects and inheritance allows code reuse within a project and
modifications from one project to another. As an added benefit, modern class libraries contain
many pre-fabricated algorithms and data structures.

The diagram shows the major components of a typical compiler: lex collates source characters
into words. symtab manages a table mapping these words to symbol descriptions which are
passed around during the rest of the compilation process. syn checks the symbol sequence
against a grammar and usually constructs a tree representing the source. sem checks the tree for
semantic correctness and might modify it to account for implicit operations. Finally, gen
produces an image of the source that can be executed in some run environment.
symtab manages a container for descriptions of mostly user-defined symbols: lex assembles a
word form the source and hands it as a key to symtab to locate a description or create a new
one. Symbol table and descriptions are obvious candidates for OOP within a compiler: the table
is a container object where each description object is held. The descriptions share at least the
ability to be located by key. If a base class is used to hold the key, inheritance helps to
encapsulate and share the lookup mechanism while keeping it separate from the information
constituting the actual description.
Using OOP a compiler can transform a program source into a tree of persistent objects as an
image. Execution is accomplished by sending a message to the root node of that tree which
results in partial traversal as the message is passed along the tree. Specifically in Java it is very simple
to make objects persistent. This results in a cheap, platform-independent image format and images can
be executed wherever a Java machine is available. Moreover, the execution message can be reused in
the compiler, e.g., when constant expressions need to be evaluated or expressions should be partially
folded. This is a significant advantage as it ensures that identical mechanisms are used for evaluation
during compilation and execution.
Semantic analysis decides if a syntactically acceptable program is meaningful. For a large part it is
concerned with the interaction of various data types in expressions: during a postorder traversal of the
parse tree, result types are computed for each part of an expression and stored in the nodes for the
benefit of code generation.
A compiler converts a sentence written in some language, i.e., a program, into an executable image. A
compiler-compiler converts a sentence written in a language like EBNF, i.e., a grammar, into an image
which is itself a compiler. As discussed above, an image is a tree of objects which understand a
message for semantic analysis and another message for execution. Semantic analysis for a grammar
means to check if the grammar is suitable for parsing, e.g., because it fulfills a condition such as
LL(1). Using the image resulting from a grammar means at least to perform recognition, i.e., the
execution message must implement at least a parsing algorithm. When the parser recognizes a phrase,
it must either build an abstract syntax tree or it should execute some user action. A popular generator,
yacc, augments phrases with C statements. The phrases are specified in BNF, i.e., without an iteration
syntax, to simplify how the C statements get access to the symbols accepted by the phrase.
We have employed OOP to implement a parser generator oops which accepts a grammar written in
EBNF, checks that it is LL(1), and creates a recursive descent parser for it. oops compiles itself; it
was bootstrapped with jay, a version of yacc which we retargeted to Java . Both versions of oops
share the class library for the execution trees.

You might also like