0% found this document useful (0 votes)
15 views

Lab 3

This document describes a lab assignment to implement a lexical analyzer in C/C++/Java. The objectives are to implement lexical analysis and create a symbol table with three columns and a hash value column. The document provides source code for a program that identifies the tokens in a given code snippet, separates them, and outputs them in a table along with their hash values. It includes functions for removing comments, identifying different types of tokens like keywords, operators, and data types, and performing lexical analysis on the preprocessed code.

Uploaded by

Sarp M
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOC, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
15 views

Lab 3

This document describes a lab assignment to implement a lexical analyzer in C/C++/Java. The objectives are to implement lexical analysis and create a symbol table with three columns and a hash value column. The document provides source code for a program that identifies the tokens in a given code snippet, separates them, and outputs them in a table along with their hash values. It includes functions for removing comments, identifying different types of tokens like keywords, operators, and data types, and performing lexical analysis on the preprocessed code.

Uploaded by

Sarp M
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOC, PDF, TXT or read online on Scribd
You are on page 1/ 8

Department of

Computing
Compiler Construction
Class: BSCS

Lab 03: Tokens & Hash Values

Lab [03]: Tokens & Hash Values


Objectives
Implementation of Lexical Analysis in C/C++/Java
Tools/Software Requirement
gcc, g++, javac, GNU Make
Lab Tasks
1. Create a program to make a symbol table with three columns,
no comments allowed in the output and add a hash value
column

Source Code:
#include <iostream>
#include <fstream>
#include <windows.h>
#include <string>
#include <sstream>
#include <unordered_map>

using namespace std;

string removeComments(string userInput)


{
int n = userInput.length();
string output;

// Flags to indicate that single line and multiple line comments


// have started or not.
bool singleLineFlag = false;
bool multiLineFlag = false;

// Traverse the given program


for (int i = 0; i < n; i++)
{
// If single line comment flag is on, then check for end of it
if (singleLineFlag == true && userInput[i] == '\n')
singleLineFlag = false;

// If multiple line comment is on, then check for end of it


else if (multiLineFlag == true && userInput[i] == '*' && userInput[i +
1] == '/')
multiLineFlag = false, i++;

// If this character is in a comment, ignore it


else if (singleLineFlag || multiLineFlag)
continue;

// Check for beginning of comments and set the approproate flags


else if (userInput[i] == '/' && userInput[i + 1] == '/')
singleLineFlag = true, i++;
else if (userInput[i] == '/' && userInput[i + 1] == '*')
multiLineFlag = true, i++;
// If current character is a non-comment character, append it to output
else
output += userInput[i];
}
return output;
}

string readFileIntoString(const string &path)


{
auto ss = ostringstream{};
ifstream input_file(path);
if (!input_file.is_open())
{
cerr << "Could not open the file - '" << path << "'" << endl;
exit(EXIT_FAILURE);
}
ss << input_file.rdbuf();
return ss.str();
}

void printFunc(string str)


{
for (auto x : str)
{
cout << x;
}
}

string digit = {"0123456789"};


string alphabets = {"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"};
string keywords[] = {"break", "char", "main", "#include", "const", "continue",
"default", "double", "else", "enum", "extern", "float", "for", "goto", "if",
"int", "long", "return", "short", "static", "struct", "switch", "void", "while",
"printf"};

char arthmetic[5] = {'+', '-', '/', '*', '%'};


char relational_ops[2] = {'>', '<'};
char punctuators[11] = {'[', ']', '{', '}', '=', '.', ';', ':', ',', '(', ')'};

int arithmeticLength = 5;
int rlationalOperatorLength = 2;
int punctuatorLength = 11;

size_t string_Hashing(string s)
{
hash<string> hasher;
return hasher(s);
}

int numChecker(string str)


{
int i = 0;
while (i != str.length())
{
if (digit.find(str[i]) == -1)
{
return -1;
}
else if (str.length() == 0)
{
return -1;
}
else if (str.length() - 1 == i)
{
return 1;
}
++i;
}
}

int identifierChecker(string str)


{
for (int j = 0; j < 25; j++)
{
if (keywords[j].compare(str) == 0)
{
return 2;
}
}

int i = 0;
while (i != str.length())
{
if (alphabets.find(str[i]) == -1 && digit.find(str[i]) == -1)
{
return -1;
}
else if (str.length() == 0 || digit.find(str[0]) != -1)
{
return -1;
}
else
{
if (i == str.length() - 1)
{
return 1;
}
}
++i;
}
}

int floatingPointChecker(string str)


{
int i = 0;
int counter = 0;

while (i != str.length())
{
if (counter == 1 && str[i] == '.')
{
return -1;
}

if (str.length() == 0)
{
return -1;
}

if (digit.find(str[i]) == -1 && str[i] != '.')


{
return -1;
}

if (str[i] == '.')
{
counter++;
}

if (i == str.length() - 1)
{
return 1;
}
++i;
}
}

int tokenMatcher(char dict[], string str, int len)


{
for (int i = 0; i < len; i++)
{
char stringTerminator[2] = "\0";

stringTerminator[0] = dict[i];
if (str == stringTerminator)
{
return 1;
}
}
return 0;
}

void tokenChecker(string word)


{
if (tokenMatcher(relational_ops, word, rlationalOperatorLength) == 1)
{
cout << "\tRelational Operator \t|\t" << word << "\t\t|\t " <<
string_Hashing(word) << endl;
}
else if (identifierChecker(word) == 2)
{
cout << "\tKeyword \t\t|\t" << word << "\t\t|\t " << string_Hashing(word)
<< endl;
}
else if (tokenMatcher(punctuators, word, punctuatorLength) == 1)
{
cout << "\tPunctuator \t\t|\t" << word << "\t\t|\t " <<
string_Hashing(word) << endl;
}
else if (tokenMatcher(arthmetic, word, arithmeticLength) == 1)
{
cout << "\tArithmetic Operator \t|\t" << word << "\t\t|\t " <<
string_Hashing(word) << endl;
}
else if (identifierChecker(word) == 1)
{
cout << "\tIdentifier \t\t|\t" << word << "\t\t|\t " <<
string_Hashing(word) << endl;
}
else if (numChecker(word) == 1)
{
cout << "\tInteger \t\t|\t" << word << "\t\t|\t " <<
string_Hashing(word) << endl;
}
else if (floatingPointChecker(word) == 1)
{
cout << "\tFloat \t\t|\t" << word << "\t\t|\t " << string_Hashing(word)
<< endl;
}
}

void lexicalAnalyzer(string str)


{
string word = "";
char stringTerminator[2] = "\0";
int i = 0;

for (auto x : str)


{
stringTerminator[0] = x;

if (x == ' ' || x == '[' || x == ']' || x == '{' || x == '}' || x == '('


|| x == ')' || x == '=' || x == '"' || x == ';' ||
x == ';' || x == ',' || (x == '+' && str[i + 1] != '+') || (x == '-'
&& str[i + 1] != '-') || x == '/' || x == '*' ||
x == '%' || x == '++' || x == '--' || x == '==' || x == '!=' || x ==
'>' || x == '<' || x == '>=' || x == '<=')
{

tokenChecker(word);
tokenChecker(stringTerminator);
word = "";
}
else
{
word = word + x;
}
i++;
}
}

int main(int argc, char *argv[])


{
HANDLE Colour = GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_SCREEN_BUFFER_INFO Change;
WORD x;
GetConsoleScreenBufferInfo(Colour, &Change);
x = Change.wAttributes;

if (argc < 2)
{
cerr << "Error: File name argument not given :`{" << endl;
exit(1);
}

string file_Content = readFileIntoString(argv[1]);


string plain_Text = removeComments(file_Content);

// SetConsoleTextAttribute(Colour, FOREGROUND_RED);
// cout << "Content of file given as input: " << endl;
// SetConsoleTextAttribute(Colour, x);
// printFunc(file_Content);

// SetConsoleTextAttribute(Colour, FOREGROUND_BLUE);
// cout << "Content of file without comments: " << endl;
// SetConsoleTextAttribute(Colour, x);
// printFunc(plain_Text);

SetConsoleTextAttribute(Colour, FOREGROUND_GREEN);
cout << "\nLexical Analyzer Output: " << endl;
SetConsoleTextAttribute(Colour, x);
cout << "\tToken Name \t\t|\t Token value\t|\t Hash Value\n\n";
lexicalAnalyzer(plain_Text);

return 0;
}

You might also like