0% found this document useful (0 votes)
12 views

compiler lab2

The document outlines an experiment focused on implementing a lexical analyzer for a simple programming language, which includes tokenizing keywords, numbers, identifiers, operators, punctuators, and boolean literals. It specifies design constraints, assumptions, and the output format, including a symbol table and validation messages for processed statements. The provided code demonstrates the implementation of the lexical analyzer and its functionality to process input from a file and generate a token stream.

Uploaded by

dpsvn.gaur12217
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views

compiler lab2

The document outlines an experiment focused on implementing a lexical analyzer for a simple programming language, which includes tokenizing keywords, numbers, identifiers, operators, punctuators, and boolean literals. It specifies design constraints, assumptions, and the output format, including a symbol table and validation messages for processed statements. The provided code demonstrates the implementation of the lexical analyzer and its functionality to process input from a file and generate a token stream.

Uploaded by

dpsvn.gaur12217
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 17

EXPERIMENT- 2

Lexical Analysis and Symbol Table Generation

M SANGEETHGOWTHAM
22BLC1386

AIM

Implement a lexical analyzer capable of tokenizing a simple programming language with


constructs like keywords, numbers, identifiers, operators, punctuators, and boolean
literals.

Classify tokens based on their types such as:

● Keywords
● Numbers (integer values)
● Identifiers (variables and function names)
● Operators
● Punctuators
● Boolean literals

DESIGN CONSTRAINTS

Implement a lexical analyzer capable of tokenizing a simple programming language with


constructs like keywords, numbers, identifiers, operators, punctuators, and boolean
literals.

Classify tokens based on their types such as:

● Keywords (e.g., if, else, int, return)


● Numbers (integer values)
● Identifiers (variables and function names)
● Operators (e.g., +, -, =)
● Punctuators (e.g., ;)
● Boolean literals (e.g., true, false)

Output Format:
● The tokenized representation of each statement will be printed.
● The symbol table will be displayed at the end of the process, listing all unique
identifiers encountered with their respective types.
● A validation message (Valid statement. or Invalid statement.) will be
shown for each processed statement.

Assumptions:

● The language syntax is simplified for the purposes of this lab and does not
include complex constructs like loops, conditionals, or functions.
● Only simple assignments are considered valid statements for this project.

Constraints on Token Length:

● Identifiers are assumed to be no longer than 50 characters.


● Numbers are assumed to be positive integers (no floating-point numbers or
negative values).
● Only single-character operators and basic punctuators (such as ;) are
considered.

CODE
#include <iostream>

#include <fstream>

#include <sstream>

#include <vector>

#include <map>

#include <string>

#include <stdexcept>

#include <cctype>

#include <iomanip>
using namespace std;

struct SymbolInfo {

string type;

double value;

};

struct LexicalEntry {

string name;

string type;

string category;

};

void vectorise(string st, char split, vector<vector<string>>&


compiled_stream) {

stringstream ss(st);

vector<string> split_vector;

string token;

while (getline(ss, token, split)) {

if(!token.empty()){
split_vector.push_back(token);

compiled_stream.push_back(split_vector);

bool isNumeric(const string& str) {

if (str.empty()) return false;

bool hasDecimal = false;

for (char c : str) {

if (!isdigit(c) && c != '-' && c != '.') {

return false;

if (c == '.') {

if (hasDecimal) {

return false;

hasDecimal = true;
}

return true;

double evaluateExpression(const string& expression, const


map<string, SymbolInfo>& symbolTable) {

if (isNumeric(expression)) {

return stod(expression);

} else if (symbolTable.count(expression)) {

return symbolTable.at(expression).value;

} else {

throw runtime_error("Invalid expression: " + expression);

void generateLexicalTable(const vector<vector<string>>&


compiled_stream, map<int, LexicalEntry>& lexicalTable) {
int nextId = 1;

// Keywords

lexicalTable[nextId++] = {"int", "Keyword", "Reserved"};

lexicalTable[nextId++] = {"float", "Keyword", "Reserved"};

//Operators

lexicalTable[nextId++] = {"=", "Operator", "Symbol"};

lexicalTable[nextId++] = {"+", "Operator", "Symbol"};

lexicalTable[nextId++] = {"-", "Operator", "Symbol"};

for (const auto& tokens : compiled_stream) {

for (const auto& token: tokens) {

bool found = false;

for (const auto& pair : lexicalTable){

if(pair.second.name == token){
found = true;

break;

if (!found) {

if (isNumeric(token)) {

lexicalTable[nextId++] = {token, "Literal", "Constant"};

} else {

lexicalTable[nextId++] = {token, "Identifier", "Variable"};

}
void processTokens(const vector<vector<string>>&
compiled_stream, const map<int, LexicalEntry>& lexicalTable,
map<string, SymbolInfo>& symbolTable, vector<string>&
tokenStream) {

for (const auto& tokens : compiled_stream) {

string lineTokenStream;

for(const auto& token : tokens){

for (const auto& pair : lexicalTable){

if (pair.second.name == token){

if(pair.second.type == "Keyword"){

lineTokenStream += "<" + to_string(pair.first) + ">";

else if(pair.second.type == "Identifier"){

lineTokenStream += "<id" + to_string(pair.first) + ">";

else if(pair.second.type == "Operator"){


lineTokenStream += "<op" + to_string(pair.first) +
">";

else if (pair.second.type == "Literal"){

lineTokenStream += "<" + to_string(pair.first) + ">";

break;

tokenStream.push_back(lineTokenStream);

if (tokens.size() >= 3) {

if ((tokens[0] == "int" || tokens[0] == "float") && tokens[2] ==


"=" ) {
string type = tokens[0];

string name = tokens[1];

string value_str = tokens[3];

if (symbolTable.count(name)) {

throw runtime_error("Variable " + name + " already


declared.");

} else if (!isNumeric(value_str)) {

throw runtime_error("Initial value must be an integer or


float: " + value_str);

} else {

double value = stod(value_str);

symbolTable[name] = { type, value };

} else if(tokens.size() >= 3 && tokens[1] == "="){

string name = tokens[0];


if(symbolTable.count(name)){

string expression = tokens[2];

try {

double newValue = evaluateExpression(expression,


symbolTable);

symbolTable[name].value = newValue;

} catch(const runtime_error& err){

throw runtime_error("Error calculating the value: " +


std::string(err.what()) );

} else {

throw runtime_error("Variable " + name + " not declared


before being used.");

} else {
if (!tokens.empty()) {

throw runtime_error("Invalid syntax found near token " +


tokens[0] + ".");

int main() {

vector<vector<string>> compiled_stream;

string txt;

ifstream MyReadFile("tokenise.txt");

if (!MyReadFile.is_open()) {

cerr << "Error: Could not open file tokenise.txt\n";

return 1;

}
while (getline(MyReadFile, txt)) {

vectorise(txt, ' ', compiled_stream);

map<int, LexicalEntry> lexicalTable;

generateLexicalTable(compiled_stream, lexicalTable);

cout << "Lexical Table:\n";

cout << setw(5) << "ID" << setw(10) << "Name" << setw(15) <<
"Type" << setw(15) << "Category" << endl;

for (const auto& pair : lexicalTable) {


cout << setw(5) << pair.first

<< setw(10) << pair.second.name

<< setw(15) << pair.second.type

<< setw(15) << pair.second.category << endl;

cout << endl;

map<string, SymbolInfo> symbolTable;


vector<string> tokenStream;

try{

processTokens(compiled_stream, lexicalTable, symbolTable,


tokenStream);

} catch(const runtime_error& error){

cerr << "Error: " << error.what() << endl;

return 1;

cout << "Symbol Table:\n";

cout << "Id_name\tType\tValue\n";

for (const auto& pair : symbolTable) {

cout << pair.first << "\t" << pair.second.type << "\t" <<
pair.second.value << endl;

cout << endl;

cout << "Token Stream (Line by Line):\n";

for(const auto& line: tokenStream){

cout << line << endl;

}
cout << endl;

return 0;

INPUT FILE

OUTPUT

You might also like