0% found this document useful (0 votes)
28 views14 pages

CD Assessment 1

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
28 views14 pages

CD Assessment 1

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 14

BCSE307P – Complier Design Lab

Winter Semester 2022-23


Assessment - 1

Name: SURJOSNATH GUHA THAKURTA


Registration No: 21BDS0177
Lab Class: VL2023240101246
Class Slot: L45 + L46
Faculty in Charge: NAVAMANI T M
Date: 21-01-2023

School of Computer Science and Engineering (SCOPE)


Vellore Institute of Technology
Vellore.
Table of Contents

Ex.No. Title Date Page No.


Assessment - 1
1 Lexical Analysis 21-05-23 3
2 Symbol Table 21-05-23 13

Signature of the student (Digital)


Aim : To write a C++ Program to simulate the lexical analysis phase of a compiler

Detailed Description of Exercise1


Lexical analysis is the first phase of a compiler. It is known as linear analysis or
scanning. It reads the stream of char making up the source program and group the char into
meaningful sequences called lexeme. It represents lexemes in the form of tokens. Write a C++
program to simulate the lexical analysis phase of a compiler that performs tokenization of a input
C program. C languge definition is as follows.
Keywords:
auto double int struct

break else long switch

case enum register typedef

char extern return union

const short float unsigned

continue for signed void

default goto sizeof volatile

do if static while

Variables are the names used to represent variables, array, and functions in the input program.
Literals are strings use a double quote to store multiple characters.
Operators: {+,-,*,/,%,<,>,=,==,!=, >=, =<}.
Direct numerical values can be considered as Constants.
Special symbols or delimiters includes the symbols , ; ( ) { }

Test case 1
Sample
Input:
Enter Program $ for termination:
void main()
{
int a=20;
printf(“%d”,a);
}
Sample Output:
Lexeme of variables: a
Lexeme of literals: “%d”
Lexeme of operator : =
Lexeme of constants : 20
Lexeme of keywords : int printf void main
Lexeme of special symbols or delimiters: , ; ( ) { }

Test case 2
Sample
Input:
Enter Program $ for termination:
void main()
{
int a[3],t1,t2;
t1=2; a[0]=1; a[1]=2; a[t1]=3;
t2=-(a[2]+t1*6)/(a[2]-t1);
if t2>5 then
print(t2);
else {
int t3;
t3=99;
t2=-25;
print(-t1+t2*t3); /* this is a comment on 2 lines */
} endif
}$

Sample Output:
Lexeme of variables: a[3] t1 t2 t3
Lexeme of literals: “Lexical Analysis of a compiler”
Lexeme of operator : - + * / > =
Lexeme of constants : 2 1 3 6 5 99 -25
Lexeme of keywords : int printf if then else endif void main
Lexeme of special symbols or delimiters: , ; ( ) { }

Code:
#include <bits/stdc++.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

using namespace std;

int isKeyword(char buffer[])


{
   char keywords[36][10] =
       {"auto", "break", "case", "char", "const", "continue", "default",
        "do", "double", "else", "enum", "extern", "float", "for", "goto",
        "if", "int", "long", "register", "return", "short", "signed",
        "sizeof", "static", "struct", "switch", "typedef", "union",
        "unsigned", "void", "volatile", "while", "printf", "main",
"endif", "then"};
   int i, flag = 0;

   for (i = 0; i < 36; ++i)


   {
      if (strcmp(keywords[i], buffer) == 0)
      {
         flag = 1;
         break;
      }
   }

   return flag;
}

int main()
{
   char ch, buffer[15], b[30], logical_op[] = "><", math_op[] = "+-*/=",
numer[] = ".0123456789", other[] = ",;\(){}[]'':";
   ifstream fin("input.txt");
   int mark[1000] = {0};
   int i, j = 0, kc = 0, ic = 0, lc = 0, mc = 0, nc = 0, oc = 0, aaa = 0;
   vector<string> k;
   vector<char> id;
   vector<char> lo;
   vector<char> ma;
   vector<string> nu;
   vector<char> ot;
   if (!fin.is_open())
   {
      cout << "error while opening the file\n";
      exit(0);
   }

   while (!fin.eof())
   {
      ch = fin.get();
      for (i = 0; i < 12; ++i)
      {
         if (ch == other[i])
         {
            int aa = ch;
            if (mark[aa] != 1)
            {
               ot.push_back(ch);
               mark[aa] = 1;
               ++oc;
            }
         }
      }

      for (i = 0; i < 5; ++i)


      {
         if (ch == math_op[i])
         {
            int aa = ch;
            if (mark[aa] != 1)
            {
               ma.push_back(ch);
               mark[aa] = 1;
               ++mc;
            }
         }
      }
      for (i = 0; i < 2; ++i)
      {
         if (ch == logical_op[i])
         {
            int aa = ch;
            if (mark[aa] != 1)
            {
               lo.push_back(ch);
               mark[aa] = 1;
               ++lc;
            }
         }
      }
      if (ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4'
|| ch == '5' || ch == '6' || ch == '7' || ch == '8' || ch == '9' || ch ==
'.' || ch == ' ' || ch == '\n' || ch == ';')
      {

         if (ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch ==


'4' || ch == '5' || ch == '6' || ch == '7' || ch == '8' || ch == '9' ||
ch == '.')
            b[aaa++] = ch;
         if ((ch == ' ' || ch == '\n' || ch == ';') && (aaa != 0))
         {
            b[aaa] = '\0';
            aaa = 0;
            char arr[30];
            strcpy(arr, b);
            nu.push_back(arr);
            ++nc;
         }
      }

      if (isalnum(ch))
         buffer[j++] = ch;
      else if ((ch == ' ' || ch == '\n') && (j != 0))
      {
         buffer[j] = '\0';
         j = 0;
         if (isKeyword(buffer) == 1)
         {
            k.push_back(buffer);
            ++kc;
         }
         else
         {
            if (buffer[0] >= 97 && buffer[0] <= 122)
            {
               if (mark[buffer[0] - 'a'] != 1)
               {
                  id.push_back(buffer[0]);
                  ++ic;
                  mark[buffer[0] - 'a'] = 1;
               }
            }
         }
      }
   }

   fin.close();
   
   printf("Lexeme of keywords: ");
   for (int f = 0; f < kc; ++f)
   {
      if (f == kc - 1)
         cout << k[f] << "\n";
      else
         cout << k[f] << ", ";
   }
   printf("Lexeme of variables: ");
   for (int f = 0; f < ic; ++f)
   {
      if (f == ic - 1)
         cout << id[f] << "\n";
      else
         cout << id[f] << ", ";
   }
   printf("Lexeme of operators: ");
   for (int f = 0; f < mc; ++f)
   {
      if (f == mc - 1)
         cout << ma[f] << ", ";
      else
         cout << ma[f] << ", ";
   }

   for (int f = 0; f < lc; ++f)


   {
      if (f == lc - 1)
         cout << lo[f] << "\n";
      else
         cout << lo[f] << ", ";
   }

   printf("Lexeme of constants: ");


   std::sort(nu.begin(), nu.end());

   // Remove adjacent duplicate elements


   auto uniqueEnd = std::unique(nu.begin(), nu.end());

   // Erase the duplicate elements from the vector


   nu.erase(uniqueEnd, nu.end());

   // Print the modified vector


   for (const auto &str : nu)
   {
      std::cout << str << ", ";
   }
   printf("\nLexeme of special symbols or delimiters: ");
   for (int f = 0; f < oc; ++f)
   {
      if (f == oc - 1)
         cout << ot[f] << "\n";
      else
         cout << ot[f] << " ";
   }

   return 0;
}

Output:
Aim : To write a C++ Program to create symbol table

Detailed Description of Exercise2


A Symbol table is a data structure used by the compiler, where each identifier in
program’s source code is stored along with information associated with it relating to its
declaration. It stores identifier as well as it’s associated attributes like scope, type, line-number
of occurrence, etc. Write a C++ program to construct symbol table for an input C program.
Test case 1
Sample
Input:
Enter Program $ for termination:
void main()
{
int a=20;
printf(“%d”,a);
}
Sample Output:
Name Type Size Address

a int 4 2024

Test case 2
Sample Input:

Enter Program $ for termination:


void main()
{
int num1,num2,sum; scanf(“%d
%d”,&num1,&num2)
sum=num1+num2;
printf(“%d”,sum);
}$

Sample Output:
Name Type Size Address

num1 int 4 2024

num2 int 4 3012

sum int 4 3048

Code:
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
#include <unordered_map>
#include <cwctype>
#include <regex>

using namespace std;

struct Symbol
{
    string name;
    string type;
    int size;
    int address;
};

vector<string> splitString(const string &input, char delimiter)


{
    vector<string> tokens;
    stringstream ss(input);
    string token;
    while (getline(ss, token, delimiter))
    {
        tokens.push_back(token);
    }
    return tokens;
}

int main()
{
    string program;
    cout << "Enter Program $ for termination: \n";
    getline(cin, program, '$');

    char a[][10] =
       {"auto", "break", "case", "char", "const", "continue", "default",
        "do", "double", "else", "enum", "extern", "float", "for", "goto",
        "if", "int", "long", "register", "return", "short", "signed",
        "sizeof", "static", "struct", "switch", "typedef", "union",
        "unsigned", "void", "volatile", "while", "printf", "main", "endif", "then"};

    vector<string> lines = splitString(program, '\n');


    unordered_map<string, Symbol> symbolTable;

    for (const auto &line : lines)


    {
        vector<string> tokens = splitString(line, ' ');
        if (tokens.size() >= 2)
        {
            string type = tokens[0];
            string rest = tokens[1];

            bool exists = count(begin(a), end(a), rest) > 0;


            if( (regex_match(rest, regex("[a-zA-Z_][a-zA-Z0-9_]{0,31}"))) && !
exists)
            {
                vector<string> variables = splitString(rest, ',');
                for (const auto &variable : variables)
                {
                    vector<string> varTokens = splitString(variable, ';');
                    string varName = varTokens[0];
                    string varType = type;
                    int varSize = 4; // Assuming int data type is 4 bytes
                    // Generate unique address for each variable
                    int varAddress = symbolTable.size() * varSize + 2024;

                    Symbol symbol{varName, varType, varSize, varAddress};


                    symbolTable[varName] = symbol;
                }
            }
        }
    }

    // Print the symbol table


    cout << "\nName\tType\tSize\tAddress\n";
    for (const auto &symbolPair : symbolTable)
    {
        const Symbol &symbol = symbolPair.second;
        cout << symbol.name << "\t" << symbol.type << "\t" << symbol.size << "\t" <<
symbol.address << "\n";
    }
    return 0;
}
Output:

You might also like