0% found this document useful (0 votes)
17 views

Compiler

Uploaded by

abdullah009amjad
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views

Compiler

Uploaded by

abdullah009amjad
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 7

Shaheed Zulfikar Ali Bhutto Institute of Science & Technology

COMPUTER SCIENCE DEPARTMENT

Total Marks: 04

Obtained Marks:

Compiler Construction
Assignment # 01

Last date of Submission: 07 Oct 2024

Submitted To: Mr. Muneeb Muzammil

Student Name:

Reg Number:

Compiler Construction BS(CS)-5B SZABIST-ISB


Shaheed Zulfikar Ali Bhutto Institute of Science & Technology

COMPUTER SCIENCE DEPARTMENT

Instructions: Copied or shown assignments will be marked zero. Late submissions are not
entertained in any case.

Create a lexical analyzer to tokenize a basic programming language named C--. The lexer
should identify various components such as identifiers, keywords, constants (both integers
and floating-point numbers), and operators all following specific token definitions.
Implement C++ or Java program to properly parse input programs, while ignoring whitespace
and managing comments. The output should display the identified tokens and their
corresponding lexemes. Provide the source code, and demonstrate the compilation process
along with the program's execution.
Use the following sample input file (input.txt):
int main() {
float x = 10.5;
printf("x is 10\n");
return 0;
}

Note:
1. Change the filename to your ID, e.g. 2073105.doc
2. Upload the .doc on Google Classroom.
3. Submit the hard print (single-sided) in class.
4. Make sure that the output screen does not have black or colored background.
5. Poor indentation and wrong format will result in deduction of marks.

Solution

Compiler Construction BS(CS)-5B SZABIST-ISB


Shaheed Zulfikar Ali Bhutto Institute of Science & Technology
CODE:
#include <iostream>

#include <fstream>

#include <cctype>

#include <string>

#include <unordered_set>

using namespace std;

// Set of keywords in C--

unordered_set<string> keywords = {"int", "float", "return", "printf"};

// Function to check if a character is a valid delimiter

bool isDelimiter(char ch) {

return ch == ' ' || ch == ';' || ch == ',' || ch == '(' || ch == ')' ||

ch == '{' || ch == '}' || ch == '\n' || ch == '\t';

// Function to check if a character is an operator

bool isOperator(char ch) {

return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch == '>';

// Function to check if the string is a keyword

bool isKeyword(string str) {

return keywords.find(str) != keywords.end();

// Function to check if the string is an integer

bool isInteger(string str) {

for (char ch : str)

if (!isdigit(ch)) return false;

return true;

Compiler Construction BS(CS)-5B SZABIST-ISB


Shaheed Zulfikar Ali Bhutto Institute of Science & Technology

// Function to check if the string is a floating-point number

bool isFloat(string str) {

bool decimal_point = false;

for (char ch : str) {

if (ch == '.') {

if (decimal_point) return false;

decimal_point = true;

} else if (!isdigit(ch)) {

return false;

return decimal_point;

// Function to tokenize the input string

void tokenize(string code) {

int i = 0;

while (i < code.length()) {

// Skip whitespace

if (isspace(code[i])) {

i++;

continue;

// Handling comments

if (code[i] == '/' && code[i + 1] == '/') {

while (i < code.length() && code[i] != '\n') i++; // Skip until the end of the line

continue;

} else if (code[i] == '/' && code[i + 1] == '*') {

i += 2; // Skip '/*'

while (i < code.length() && !(code[i] == '*' && code[i + 1] == '/')) i++;

i += 2; // Skip '*/'

continue;

Compiler Construction BS(CS)-5B SZABIST-ISB


Shaheed Zulfikar Ali Bhutto Institute of Science & Technology
}

// Handling keywords and identifiers

if (isalpha(code[i])) {

string buffer;

while (i < code.length() && (isalnum(code[i]) || code[i] == '_')) {

buffer += code[i];

i++;

if (isKeyword(buffer)) {

cout << "Keyword: " << buffer << endl;

} else {

cout << "Identifier: " << buffer << endl;

continue;

// Handling numbers (integer and float)

if (isdigit(code[i])) {

string buffer;

bool is_float = false;

while (i < code.length() && (isdigit(code[i]) || code[i] == '.')) {

if (code[i] == '.') is_float = true;

buffer += code[i];

i++;

if (is_float) {

cout << "Float: " << buffer << endl;

} else {

cout << "Integer: " << buffer << endl;

continue;

Compiler Construction BS(CS)-5B SZABIST-ISB


Shaheed Zulfikar Ali Bhutto Institute of Science & Technology
// Handling string literals

if (code[i] == '"') {

string buffer;

i++; // Skip the opening quote

while (i < code.length() && code[i] != '"') {

buffer += code[i];

i++;

i++; // Skip the closing quote

cout << "String Literal: \"" << buffer << "\"" << endl;

continue;

// Handling operators

if (isOperator(code[i])) {

cout << "Operator: " << code[i] << endl;

i++;

continue;

// Handling delimiters

if (isDelimiter(code[i])) {

if (code[i] != ' ' && code[i] != '\n' && code[i] != '\t') {

cout << "Delimiter: " << code[i] << endl;

i++;

int main() {

ifstream inputFile("input.txt");

if (!inputFile.is_open()) {

cout << "Could not open the file!" << endl;

Compiler Construction BS(CS)-5B SZABIST-ISB


Shaheed Zulfikar Ali Bhutto Institute of Science & Technology
return 1;

string code, line;

while (getline(inputFile, line)) {

code += line + "\n";

inputFile.close();

cout << "Tokenizing the input program:\n";

tokenize(code);

return 0;

Steps to Compile and Run

1. Save the code in a file named lexer.cpp.


2. Create a sample input file named input.txt with the following content:

int main() {
float x = 10.5;
printf("x is 10\n");
return 0;
}

3. Compile the program using a C++ compiler like g++:

g++ lexer.cpp -o lexer

4. Run the compiled program:

./lexer

Compiler Construction BS(CS)-5B SZABIST-ISB

You might also like