0% found this document useful (0 votes)
5 views4 pages

Mid Term Project

The document contains a C++ program for a tokenizer that processes a line of C++ code and identifies various tokens such as keywords, operators, punctuation, comments, and literals. It defines a structure for tokens and includes functions to check for keywords, operators, punctuation, and to tokenize the input code. The main function prompts the user for input and displays the identified tokens.

Uploaded by

sajidtusan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views4 pages

Mid Term Project

The document contains a C++ program for a tokenizer that processes a line of C++ code and identifies various tokens such as keywords, operators, punctuation, comments, and literals. It defines a structure for tokens and includes functions to check for keywords, operators, punctuation, and to tokenize the input code. The main function prompts the user for input and displays the identified tokens.

Uploaded by

sajidtusan
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

Mid-Term Project

Student ID: 41230100703


Student Name: Sajid Hossain Tusan
Course Name: Compiler Design
Course Code: CSE3272
Code:
#include <iostream>
#include <vector>
#include <string>
#include <cctype>

using namespace std;

struct Token {
string value;
string type;
};

// Basic list of C++ keywords


const string keywordList[] = {
"int", "float", "if", "else", "while", "for", "return", "char", "double",
"bool", "void", "class"
};
const int keywordCount = sizeof(keywordList) / sizeof(keywordList[0]);

// Check if a word is a keyword


bool isKeyword(const string &word) {
for (int i = 0; i < keywordCount; i++) {
if (word == keywordList[i]) return true;
}
return false;
}

// Check if a character is an operator


bool isOperator(char ch) {
return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch
== '<' || ch == '>';
}

// Check if a character is punctuation


bool isPunctuation(char ch) {
return ch == ';' || ch == ',' || ch == ':' || ch == '.';
}

// Check if a character is a parenthesis


bool isParenthesis(char ch) {
return ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '[' || ch
== ']';
}

// Check if a string is a number


bool isNumber(const string &s) {
for (char ch : s) {
if (!isdigit(ch)) return false;
}
return !s.empty();
}
// Main tokenizer function
vector<Token> tokenize(const string &code) {
vector<Token> tokens;
string word;
int i = 0;

while (i < code.length()) {


char ch = code[i];

// Skip spaces
if (isspace(ch)) {
i++;
continue;
}

// Handle preprocessor directives


if (ch == '#') {
string directive;
while (i < code.length() && code[i] != '\n') {
directive += code[i++];
}
tokens.push_back({directive, "Preprocessor Directive"});
continue;
}

// Handle comments
if (ch == '/' && i + 1 < code.length()) {
if (code[i + 1] == '/') {
string comment = "//";
i += 2;
while (i < code.length()) comment += code[i++];
tokens.push_back({comment, "Single-line Comment"});
continue;
} else if (code[i + 1] == '*') {
string comment = "/*";
i += 2;
while (i < code.length() - 1 && !(code[i] == '*' && code[i + 1] ==
'/')) {
comment += code[i++];
}
comment += "*/";
i += 2;
tokens.push_back({comment, "Multi-line Comment"});
continue;
}
}

// Handle string literals


if (ch == '"') {
string str = "\"";
i++;
while (i < code.length() && code[i] != '"') str += code[i++];
if (i < code.length()) str += code[i++]; // closing quote
tokens.push_back({str, "String Literal"});
continue;
}

// Handle character literals


if (ch == '\'') {
string str = "'";
i++;
while (i < code.length() && code[i] != '\'') str += code[i++];
if (i < code.length()) str += code[i++]; // closing quote
tokens.push_back({str, "Character Literal"});
continue;
}

// Handle operators
if (isOperator(ch)) {
tokens.push_back({string(1, ch), "Operator"});
i++;
continue;
}

// Handle punctuation
if (isPunctuation(ch)) {
tokens.push_back({string(1, ch), "Punctuation"});
i++;
continue;
}

// Handle parentheses
if (isParenthesis(ch)) {
tokens.push_back({string(1, ch), "Parenthesis"});
i++;
continue;
}

// Handle identifiers, keywords, and numbers


if (isalnum(ch) || ch == '_') {
word = "";
while (i < code.length() && (isalnum(code[i]) || code[i] == '_')) {
word += code[i++];
}

if (isKeyword(word))
tokens.push_back({word, "Keyword"});
else if (isNumber(word))
tokens.push_back({word, "Number"});
else
tokens.push_back({word, "Identifier"});
continue;
}

// Unknown character
tokens.push_back({string(1, ch), "Unknown"});
i++;
}

return tokens;
}

// Print tokens
void displayTokens(const vector<Token> &tokens) {
cout << "\nTokens Found:\n";
for (const auto &t : tokens) {
cout << t.value << " -> " << t.type << endl;
}
}

int main() {
string line;
cout << "Enter a line of C++ code:\n";
getline(cin, line);

vector<Token> tokens = tokenize(line);


displayTokens(tokens);

return 0;
}

You might also like