CC Project Code
CC Project Code
#include <string>
#include <fstream>
#include <vector>
#include <sstream>
#include <unordered_set>
#include <string>
#include <fstream>
#include <unordered_map>
#include <regex>
int totalTokens = 0;
vector<string> linesFromDataFile;
vector<vector<string>> tokenizedData;
vector<string> cleanedLines;
vector<string> tokenizedLines;
unordered_set<string> found_operators = {};
unordered_set<string> found_punctuators = {};
unordered_set<string> found_keywords = {};
unordered_set<string> found_identifiers = {};
unordered_set<string> found_constants = {};
unordered_set<string> found_literals = {};
unordered_set<string> found_special_characters = {};
unordered_set<string> found_invalid_tokens = {};
vector<string> errors;
string local_data;
void load_data_from_string()
{
stringstream ss(local_data);
string line;
if (!fin)
{
cout << "Error opening data.txt!" << endl;
return;
}
while(getline(fin, line))
{
linesFromDataFile.push_back(line);
}
fin.close();
if (!inString && line[i] == '/' && (i + 1) < line.length() && line[i + 1] == '/')
{ //for // wala comment
break;
}
if (!inString && line[i] == '/' && (i + 1) < line.length() && line[i + 1] == '*')
{ //for /**/ wala comment
inComment = true;
continue;
}
if (inComment && line[i] == '*' && (i + 1) < line.length() && line[i + 1] == '/')
{
inComment = false;
i++;
continue;
}
if (!inComment) {
if (line[i] != ' ' || (cleanedLine.length() > 0 && cleanedLine.back() != ' ')) {
cleanedLine += line[i];
}
}
}
int state = 0;
state = transition_table[state][char_type];
if (state == -1) {
return false;
}
}
return state == 3;
}
bool constant_scanner(const string& token) {
int transition_table[8][5] = {
{1, 2, -1, -1, -1},
{-1, 2, -1, -1, -1},
{-1, 2, 3, 5, -1},
{-1, 4, -1, -1, -1},
{-1, 4, -1, 5, -1},
{6, 6, -1, -1, -1},
{-1, 6, -1, -1, -1},
};
int state = 0;
state = transition_table[state][char_type];
if (state == -1) {
return false;
}
}
enum Character {
ASTERISK = '*',
SLASH = '/',
PERCENT = '%',
EXCLAMATION_MARK = '!',
LESS_THAN_MARK = '<',
EQUAL_MARK = '=',
PLUS_MARK = '+',
MINUS_MARK = '-',
GREATER_THAN_MARK = '>',
AND_MARK = '&',
OR_MARK = '|',
COLON_MARK = ':',
INVALID_CHAR = -1
};
int transition_table[12][256] = { 0 };
transition_table[START][ASTERISK] = FINAL;
transition_table[START][SLASH] = FINAL;
transition_table[START][PERCENT] = FINAL;
transition_table[START][EXCLAMATION_MARK] = EXCLAMATION;
transition_table[START][LESS_THAN_MARK] = LESS_THAN;
transition_table[START][EQUAL_MARK] = EQUAL;
transition_table[START][PLUS_MARK] = PLUS;
transition_table[START][MINUS_MARK] = MINUS;
transition_table[START][GREATER_THAN_MARK] = GREATER_THAN;
transition_table[START][AND_MARK] = AND;
transition_table[START][OR_MARK] = OR;
transition_table[START][COLON_MARK] = COLON;
transition_table[EXCLAMATION][EQUAL_MARK] = FINAL;
transition_table[LESS_THAN][GREATER_THAN_MARK] = FINAL;
transition_table[LESS_THAN][LESS_THAN_MARK] = FINAL;
transition_table[EQUAL][COLON_MARK] = FINAL_EQUAL;
transition_table[EQUAL][PLUS_MARK] = FINAL;
transition_table[EQUAL][LESS_THAN_MARK] = FINAL;
transition_table[EQUAL][EQUAL_MARK] = FINAL;
transition_table[EQUAL][GREATER_THAN_MARK] = FINAL;
transition_table[PLUS][PLUS_MARK] = FINAL;
transition_table[MINUS][MINUS_MARK] = FINAL;
transition_table[GREATER_THAN][GREATER_THAN_MARK] = FINAL;
transition_table[AND][AND_MARK] = FINAL;
transition_table[OR][OR_MARK] = FINAL;
transition_table[COLON][COLON_MARK] = FINAL;
transition_table[FINAL_EQUAL][EQUAL_MARK] = FINAL;
if (transition_table[state][char_code] == 0) {
return false;
}
state = transition_table[state][char_code];
}
enum Character {
OPEN_BRACKET = '[',
CLOSE_BRACKET = ']',
OPEN_CURLY = '{',
CLOSE_CURLY = '}',
LESS_THAN = '<',
GREATER_THAN = '>',
OPEN_PAREN = '(',
CLOSE_PAREN = ')',
SEMICOLON = ';',
COMMA = ',',
INVALID_CHAR = -1
};
int transition_table[2][256] = { 0 };
transition_table[START][OPEN_BRACKET] = FINAL;
transition_table[START][CLOSE_BRACKET] = FINAL;
transition_table[START][OPEN_CURLY] = FINAL;
transition_table[START][CLOSE_CURLY] = FINAL;
transition_table[START][LESS_THAN] = FINAL;
transition_table[START][GREATER_THAN] = FINAL;
transition_table[START][OPEN_PAREN] = FINAL;
transition_table[START][CLOSE_PAREN] = FINAL;
transition_table[START][SEMICOLON] = FINAL;
transition_table[START][COMMA] = FINAL;
if (token.size() != 1) {
return false;
}
char ch = token[0];
int char_code = static_cast<int>(ch);
if (transition_table[state][char_code] == FINAL) {
return true;
}
else {
return false;
}
}
bool special_character_scanner(const string& token) {
enum State {
START = 0,
VALID = 1,
INVALID = 2
};
int transition_table[2][256] = { 0 };
transition_table[START][0] = INVALID;
transition_table[START][1] = VALID;
if (token.size() != 1) {
return false;
}
if (token.size() == 1) {
state = transition_table[state][1];
}
if (state == VALID) {
return true;
}
return false;
}
bool keyword_scanner(const string& token) {
enum State {
START = 0,
VALID_KEYWORD,
INVALID_KEYWORD
};
if (token.empty()) {
return false;
}
if (state == START) {
if (!isalpha(ch)) {
return false;
}
}
}
enum State {
START = 0,
IN_STRING = 1,
IN_CHAR = 2,
INVALID = -1,
VALID = 3
};
int transitionTable[4][256] = { 0 };
transitionTable[START]['\"'] = IN_STRING;
transitionTable[START]['\''] = IN_CHAR;
transitionTable[IN_STRING]['\"'] = VALID;
transitionTable[IN_CHAR]['\''] = VALID;
state = transitionTable[state][ch];
if (state == INVALID) {
return false;
}
}
int main() {
local_data = R"(
# include < iostream >
# include < string >
# include < boolean >
using namespace std ;
int main ( )
{
//hello this is a comment
std :: int var_1 = 5.0 * 10 & 6 ;
double _var_2 = 3.43433E+13 ; //hello this is a comment
char @character = 'a' ;
int _va%r_3_ = var_1 + _var_2 ;
cout << var_1 + _var_2 << "HELLO" << endl ;
}
)";
return 0;
}