0% found this document useful (0 votes)
11 views7 pages

CC Project Code

Uploaded by

muhamanibutt
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
11 views7 pages

CC Project Code

Uploaded by

muhamanibutt
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

#include <iostream>

#include <string>
#include <fstream>
#include <vector>
#include <sstream>
#include <unordered_set>
#include <string>
#include <fstream>
#include <unordered_map>
#include <regex>

using namespace std;

int totalTokens = 0;
vector<string> linesFromDataFile;
vector<vector<string>> tokenizedData;
vector<string> cleanedLines;
vector<string> tokenizedLines;
unordered_set<string> found_operators = {};
unordered_set<string> found_punctuators = {};
unordered_set<string> found_keywords = {};
unordered_set<string> found_identifiers = {};
unordered_set<string> found_constants = {};
unordered_set<string> found_literals = {};
unordered_set<string> found_special_characters = {};
unordered_set<string> found_invalid_tokens = {};

vector<string> errors;
string local_data;

void load_data_from_string()
{
stringstream ss(local_data);
string line;

while (getline(ss, line))


{
linesFromDataFile.push_back(line);
}

cout << "\nStored lines from local_data:";


for (size_t i = 0; i < linesFromDataFile.size(); i++)
{
cout << linesFromDataFile[i] << endl;
}
}
void load_data()
{
ifstream fin("Sourcecode.txt");
string line;

if (!fin)
{
cout << "Error opening data.txt!" << endl;
return;
}
while(getline(fin, line))
{
linesFromDataFile.push_back(line);
}
fin.close();

cout << "\nStored lines from data.txt:\n";


for (size_t i = 0; i < linesFromDataFile.size(); i++)
{
cout << linesFromDataFile[i] << endl;
}
cout << endl << endl;
}
string remove_comments_and_spaces(const string& line) {
string cleanedLine;
bool inComment = false;
bool inString = false;

for (size_t i = 0; i < line.length(); ++i)


{
if (line[i] == '\"') {
inString = !inString;
cleanedLine += line[i];
continue;
}

if (!inString && line[i] == '/' && (i + 1) < line.length() && line[i + 1] == '/')
{ //for // wala comment
break;
}
if (!inString && line[i] == '/' && (i + 1) < line.length() && line[i + 1] == '*')
{ //for /**/ wala comment
inComment = true;
continue;
}
if (inComment && line[i] == '*' && (i + 1) < line.length() && line[i + 1] == '/')
{
inComment = false;
i++;
continue;
}

if (!inComment) {
if (line[i] != ' ' || (cleanedLine.length() > 0 && cleanedLine.back() != ' ')) {
cleanedLine += line[i];
}
}
}

size_t start = cleanedLine.find_first_not_of(' ');


size_t end = cleanedLine.find_last_not_of(' ');
return (start == string::npos) ? "" : cleanedLine.substr(start, end - start + 1);
}
void process_lines(const vector<string>& linesFromDataFile)
{
for (const auto& line : linesFromDataFile)
{
string cleanedLine = remove_comments_and_spaces(line);
if (!cleanedLine.empty())
{
cleanedLines.push_back(cleanedLine);
}
}
}
void tokenize_lines(const vector<string>& cleanedLines) {
const regex tokenRegex(R"([\s]+|(;|\(|\)|\{|\}|\[|\]))");

for (const auto& line : cleanedLines) {


string currentToken;
for (size_t i = 0; i < line.size(); ++i) {
char ch = line[i];
if (isspace(ch) || ch == ';' || ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '#') {
if (!currentToken.empty()) {
tokenizedLines.push_back(currentToken);
currentToken.clear();
}
if (!isspace(ch)) {
tokenizedLines.push_back(string(1, ch));
}
}
else {
currentToken += ch;
}
}
if (!currentToken.empty()) {
tokenizedLines.push_back(currentToken);
}
}

cout << "Tokens in line: " << endl;


for (const auto& tok : tokenizedLines) {
cout << " < " << tok << " > ";
}
cout << endl << endl << endl;
}

bool identifier_scanner(const string& token) {


int transition_table[4][4] = {
{2, 2, 1, -1},
{3,3,3, -1},
{2, 2, 3, -1},
{3,3,3, -1}
};

enum CharCategory { L, D, Underscore, O };

auto classify_char = [](char ch) -> CharCategory {


if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) return L;
if (ch >= '0' && ch <= '9') return D;
if (ch == '_') return Underscore;
return O;
};

int state = 0;

for (size_t i = 0; i < token.size(); ++i) {


char ch = token[i];
int char_type = classify_char(ch);

state = transition_table[state][char_type];

if (state == -1) {
return false;
}
}

return state == 3;
}
bool constant_scanner(const string& token) {
int transition_table[8][5] = {
{1, 2, -1, -1, -1},
{-1, 2, -1, -1, -1},
{-1, 2, 3, 5, -1},
{-1, 4, -1, -1, -1},
{-1, 4, -1, 5, -1},
{6, 6, -1, -1, -1},
{-1, 6, -1, -1, -1},
};

enum CharCategory { S, D, Dot, E, O };

auto classify_char = [](char ch) -> CharCategory {


if (ch == '+' || ch == '-') return S;
if (ch >= '0' && ch <= '9') return D;
if (ch == '.') return Dot;
if (ch == 'e' || ch == 'E') return E;
return O;
};

int state = 0;

for (size_t i = 0; i < token.size(); ++i) {


char ch = token[i];
int char_type = classify_char(ch);

state = transition_table[state][char_type];

if (state == -1) {
return false;
}
}

return state == 2 || state == 4 || state == 6;


}
bool operator_scanner(const string& token) {
enum State {
START = 0,
EXCLAMATION = 1,
LESS_THAN = 2,
EQUAL = 3,
PLUS = 4,
MINUS = 5,
GREATER_THAN = 6,
AND = 7,
OR = 8,
COLON = 9,
FINAL = 10,
FINAL_EQUAL = 11
};

enum Character {
ASTERISK = '*',
SLASH = '/',
PERCENT = '%',
EXCLAMATION_MARK = '!',
LESS_THAN_MARK = '<',
EQUAL_MARK = '=',
PLUS_MARK = '+',
MINUS_MARK = '-',
GREATER_THAN_MARK = '>',
AND_MARK = '&',
OR_MARK = '|',
COLON_MARK = ':',
INVALID_CHAR = -1
};
int transition_table[12][256] = { 0 };

transition_table[START][ASTERISK] = FINAL;
transition_table[START][SLASH] = FINAL;
transition_table[START][PERCENT] = FINAL;
transition_table[START][EXCLAMATION_MARK] = EXCLAMATION;
transition_table[START][LESS_THAN_MARK] = LESS_THAN;
transition_table[START][EQUAL_MARK] = EQUAL;
transition_table[START][PLUS_MARK] = PLUS;
transition_table[START][MINUS_MARK] = MINUS;
transition_table[START][GREATER_THAN_MARK] = GREATER_THAN;
transition_table[START][AND_MARK] = AND;
transition_table[START][OR_MARK] = OR;
transition_table[START][COLON_MARK] = COLON;

transition_table[EXCLAMATION][EQUAL_MARK] = FINAL;
transition_table[LESS_THAN][GREATER_THAN_MARK] = FINAL;
transition_table[LESS_THAN][LESS_THAN_MARK] = FINAL;
transition_table[EQUAL][COLON_MARK] = FINAL_EQUAL;
transition_table[EQUAL][PLUS_MARK] = FINAL;
transition_table[EQUAL][LESS_THAN_MARK] = FINAL;
transition_table[EQUAL][EQUAL_MARK] = FINAL;
transition_table[EQUAL][GREATER_THAN_MARK] = FINAL;
transition_table[PLUS][PLUS_MARK] = FINAL;
transition_table[MINUS][MINUS_MARK] = FINAL;
transition_table[GREATER_THAN][GREATER_THAN_MARK] = FINAL;
transition_table[AND][AND_MARK] = FINAL;
transition_table[OR][OR_MARK] = FINAL;
transition_table[COLON][COLON_MARK] = FINAL;
transition_table[FINAL_EQUAL][EQUAL_MARK] = FINAL;

int state = START;

for (char ch : token) {


int char_code = static_cast<int>(ch);

if (transition_table[state][char_code] == 0) {
return false;
}

state = transition_table[state][char_code];
}

return (state == FINAL || state == PLUS || state == MINUS || state == COLON);


}
bool punctuator_scanner(const string& token) {
enum State {
START = 0,
FINAL = 1
};

enum Character {
OPEN_BRACKET = '[',
CLOSE_BRACKET = ']',
OPEN_CURLY = '{',
CLOSE_CURLY = '}',
LESS_THAN = '<',
GREATER_THAN = '>',
OPEN_PAREN = '(',
CLOSE_PAREN = ')',
SEMICOLON = ';',
COMMA = ',',
INVALID_CHAR = -1
};
int transition_table[2][256] = { 0 };

transition_table[START][OPEN_BRACKET] = FINAL;
transition_table[START][CLOSE_BRACKET] = FINAL;
transition_table[START][OPEN_CURLY] = FINAL;
transition_table[START][CLOSE_CURLY] = FINAL;
transition_table[START][LESS_THAN] = FINAL;
transition_table[START][GREATER_THAN] = FINAL;
transition_table[START][OPEN_PAREN] = FINAL;
transition_table[START][CLOSE_PAREN] = FINAL;
transition_table[START][SEMICOLON] = FINAL;
transition_table[START][COMMA] = FINAL;

if (token.size() != 1) {
return false;
}

int state = START;

char ch = token[0];
int char_code = static_cast<int>(ch);

if (transition_table[state][char_code] == FINAL) {
return true;
}
else {
return false;
}
}
bool special_character_scanner(const string& token) {
enum State {
START = 0,
VALID = 1,
INVALID = 2
};

int transition_table[2][256] = { 0 };

transition_table[START][0] = INVALID;
transition_table[START][1] = VALID;

if (token.size() != 1) {
return false;
}

int state = START;

if (token.size() == 1) {
state = transition_table[state][1];
}

if (state == VALID) {
return true;
}

return false;
}
bool keyword_scanner(const string& token) {
enum State {
START = 0,
VALID_KEYWORD,
INVALID_KEYWORD
};

static const unordered_set<string> keywords = {


"loop", "agar", "magar", "asm", "else", "new", "this", "auto", "enum", "operator",
"throw", "bool", "explicit", "private", "true", "break", "export", "protected",
"try", "case", "extern", "public", "typedef", "catch", "false", "register",
"typeid", "char", "float", "typename", "class", "for", "return", "union",
"const", "friend", "short", "unsigned", "goto", "signed", "using", "continue",
"if", "sizeof", "virtual", "default", "inline", "static", "void", "delete",
"int", "volatile", "do", "long", "struct", "double", "mutable", "switch",
"while", "namespace",

"include", "iostream", "boolean", "std", "main", "string", "cout", "cin", "endl"


};

if (token.empty()) {
return false;
}

int state = START;

for (size_t i = 0; i < token.size(); ++i) {


char ch = token[i];

if (state == START) {
if (!isalpha(ch)) {
return false;
}
}
}

return (keywords.find(token) != keywords.end());


}
bool literal_scanner(const string& token) {

enum State {
START = 0,
IN_STRING = 1,
IN_CHAR = 2,
INVALID = -1,
VALID = 3
};

int transitionTable[4][256] = { 0 };

transitionTable[START]['\"'] = IN_STRING;
transitionTable[START]['\''] = IN_CHAR;
transitionTable[IN_STRING]['\"'] = VALID;
transitionTable[IN_CHAR]['\''] = VALID;

for (int i = 0; i < 256; i++) {


if (transitionTable[START][i] == 0) transitionTable[START][i] = INVALID;
if (transitionTable[IN_STRING][i] == 0) transitionTable[IN_STRING][i] = IN_STRING;
if (transitionTable[IN_CHAR][i] == 0) transitionTable[IN_CHAR][i] = IN_CHAR;
}
int state = START;

for (size_t i = 0; i < token.size(); ++i) {


char ch = token[i];

state = transitionTable[state][ch];

if (state == INVALID) {
return false;
}
}

return state == VALID;


}
void fa_scanner() {
ofstream fout("errors.txt");
ofstream fout_tokens("token.txt");

for (const auto& token : tokenizedLines) {


if (keyword_scanner(token)) {
found_keywords.insert(token);
}
else if (identifier_scanner(token)) {
found_identifiers.insert(token);
}
else if (literal_scanner(token)) {
found_literals.insert(token);
}
else if (constant_scanner(token)) {
found_constants.insert(token);
}
else if (operator_scanner(token)) {
found_operators.insert(token);
}
else if (punctuator_scanner(token)) {
found_punctuators.insert(token);
}
else if (special_character_scanner(token)) {
found_special_characters.insert(token);
}
else {
errors.push_back(token);
}
}

cout << "Identifiers: ";


fout_tokens << "Identifiers: ";
for (const auto& id : found_identifiers) {
cout << id << " ";
fout_tokens << id << " ";
}
cout << endl;
fout_tokens << endl;

cout << "Constants: ";


fout_tokens << "Constants: ";
for (const auto& constant : found_constants) {
cout << constant << " ";
fout_tokens << constant << " ";
}
cout << endl;
fout_tokens << endl;

cout << "Operators: ";


fout_tokens << "Operators: ";
for (const auto& op : found_operators) {
cout << op << " ";
fout_tokens << op << " ";
}
cout << endl;
fout_tokens << endl;

cout << "Punctuators: ";


fout_tokens << "Punctuators: ";
for (const auto& punct : found_punctuators) {
cout << punct << " ";
fout_tokens << punct << " ";
}
cout << endl;
fout_tokens << endl;

cout << "keywords: ";


fout_tokens << "keywords: ";
for (const auto& keyword : found_keywords) {
cout << keyword << " ";
fout_tokens << keyword << " ";
}
cout << endl;
fout_tokens << endl;

cout << "Literals: ";


fout_tokens << "Literals: ";
for (const auto& literal : found_literals) {
cout << literal << " ";
fout_tokens << literal << " ";
}
cout << endl;
fout_tokens << endl;

cout << "Special Characters: ";


fout_tokens << "Special Characters: ";
for (const auto& sp_character : found_special_characters) {
cout << sp_character << " ";
fout_tokens << sp_character << " ";
}
cout << endl;
fout_tokens << endl;

cout << "Errors Invalid Characters: ";


fout << "Errors Invalid Characters: ";
for (const auto& error : errors) {
cout << error << " ";
fout << error << " ";
}
cout << endl;
fout << endl;
}

int main() {
local_data = R"(
# include < iostream >
# include < string >
# include < boolean >
using namespace std ;
int main ( )
{
//hello this is a comment
std :: int var_1 = 5.0 * 10 & 6 ;
double _var_2 = 3.43433E+13 ; //hello this is a comment
char @character = 'a' ;
int _va%r_3_ = var_1 + _var_2 ;
cout << var_1 + _var_2 << "HELLO" << endl ;
}
)";

bool get_data_from_file = true;


if (get_data_from_file){
load_data();
}
else{
load_data_from_string();
}
process_lines(linesFromDataFile);
tokenize_lines(cleanedLines);
fa_scanner();

return 0;
}

You might also like