0% found this document useful (0 votes)
16 views12 pages

Lab 3 and 4

The document is a report on a lexical analyzer implementation, detailing the structure and functions for token creation and management. It includes definitions for token types, functions for identifying different operators and keywords, and a symbol table for managing tokens. The report also provides an example input code and demonstrates the process of reading, tokenizing, and symbol table creation from the input code.

Uploaded by

shreyasinha3002
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as ODT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views12 pages

Lab 3 and 4

The document is a report on a lexical analyzer implementation, detailing the structure and functions for token creation and management. It includes definitions for token types, functions for identifying different operators and keywords, and a symbol table for managing tokens. The report also provides an example input code and demonstrates the process of reading, tokenizing, and symbol table creation from the input code.

Uploaded by

shreyasinha3002
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as ODT, PDF, TXT or read online on Scribd
You are on page 1/ 12

Lab 3&4 Report

David Jijo
C-52
220905478

Lexical Analyzer:

#token.h
#ifndef TOKEN_H
#define TOKEN_H

#include "utils.h"
extern int TOKEN_TABLE_SIZE;

typedef enum TokTypes{


TOK_EOF = 2,
IDENTIFIER = 3,
NUMBER = 4,
KEYWORD = 5,
RELOP = 6,
ARITHOP = 7,
LOGICALOP = 8,
ASSIGNOP = 9,
PUNCTUATION = 10,
} TokTypes;

typedef struct Token{


char token_name[1024];
int index;
int pos;
TokTypes type;
}Token;

Token* createToken(const char* name, int index, int pos, TokTypes type) {
Token* t = (Token*)malloc(sizeof(Token));

strcpy(t->token_name, name);
t->token_name[sizeof(t->token_name) - 1] = '\0';
t->index = index;
t->pos = pos;
t->type = type;

return t;
}

char* TokStringMap(Token* t){


char* m[11] = {
"xxx",
"xxx",
"eof",
"id",
"num",
"key",
"relop",
"arithop",
"logicalop",
"assignop",
"punct",
};

return m[t->type];
}

void printToken(Token* t){


if (t != NULL) {
printf("<'%s',", t->token_name);
printf("%d,", t->index);
printf("x %d,", t->pos);
printf(" %s", TokStringMap(t));
printf("> ; ");
}else{
printf("token is null");
}
}

void getTokens(char* input, Token* allTokens[TOKEN_TABLE_SIZE]){


int pos = 0;

int left = 0;
int right = 0;

int len = strlen(input);

int i = 0;
char buff[TOKEN_TABLE_SIZE];
Token* t;

while (right <= len && left <= len){


if (right <= len && !isDelimiter(input[right])) {
right++;
} else {
if (left < right) {
strncpy(buff, input + left, right - left);
buff[right - left] = '\0';
pos = left;

if ( isInteger(buff) ){
allTokens[i] = createToken(buff, i, pos, NUMBER);
}else if ( isKeyword(buff) ){
allTokens[i] = createToken(buff, i, pos, KEYWORD);
}else{
allTokens[i] = createToken(buff, i, pos, IDENTIFIER);
}
i++;
left = right;
}

if (isDelimiter(input[right])) {

if (isRelationalOperator(&input[right]) > 0){


if (isRelationalOperator(&input[right]) == 1){
buff[0] = input[right];
buff[1] = '\0';
pos = right;
allTokens[i] = createToken(buff, i, pos, RELOP);
i++;
}else{
buff[0] = input[right];
buff[1] = input[right+1];
buff[2] = '\0';
pos = right+1;
allTokens[i] = createToken(buff, i, pos, RELOP);
right = right + 2;
left = right;
i++;
continue;
}

}else if (isLogicalOperator(&input[right]) > 0){

if (isLogicalOperator(&input[right]) == 1){
buff[0] = input[right];
buff[1] = '\0';
pos = right;
allTokens[i] = createToken(buff, i, pos, LOGICALOP);
i++;
}else{
buff[0] = input[right];
buff[1] = input[right+1];
buff[2] = '\0';
pos = right+1;
allTokens[i] = createToken(buff, i, pos, LOGICALOP);
right = right + 2;
left = right;
i++;
continue;
}

}else if (isAssignmentOperator(&input[right])) {
if (isAssignmentOperator(&input[right]) == 1){
buff[0] = input[right];
buff[1] = '\0';
pos = right;
allTokens[i] = createToken(buff, i, pos, ASSIGNOP);
i++;
}else{
buff[0] = input[right];
buff[1] = input[right+1];
buff[2] = '\0';
pos = right+1;
allTokens[i] = createToken(buff, i, pos, ASSIGNOP);
right = right + 2;
left = right;
i++;
continue;
}

}
else if (isArithmeticOperator(input[right])) {
buff[0] = input[right];
buff[1] = '\0';
pos = right;
allTokens[i] = createToken(buff, i, pos, ARITHOP);
i++;
}
else if (isPunctuation(input[right])) {
buff[0] = input[right];
buff[1] = '\0';
pos = right;
allTokens[i] = createToken(buff, i, pos, PUNCTUATION);
i++;
}
right++;
left = right;
}
}
}
}

#endif

#utils.h

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern FILE *fa;

static char *identifier;


static double number;

extern char *keywords[7];

bool isDelimiter(char c) { // a delimiter is a piece of data that marks the boundary between
lexemes, and can themselves be lexemes
return (
c == ' ' ||
c == '\n' ||
c == '+' ||
c == '-' ||
c == '*' ||
c == '/' ||
c == '%' ||

c == ',' ||
c == ';' ||
c == ':' ||
c == '>' ||
c == '<' ||
c == '=' ||
c == '!' ||

c == '(' ||
c == ')' ||
c == '[' ||
c == ']' ||
c == '{' ||
c == '}' ||

c == '|' ||
c == '&' ||
c == '^'
);
}

bool isOperator(char c){


return(
c == '+' ||
c == '-' ||
c == '*' ||
c == '/' ||
c == '>' ||
c == '<' ||
c == '=' ||
c == '%'
);
}

bool isPunctuation(char c){


return (

c == ',' ||
c == ';' ||
c == '(' ||
c == ')' ||
c == '[' ||
c == ']' ||
c == '{' ||
c == '}' ||
c == ':'
);
}

bool isArithmeticOperator(char c){


return(
c == '+' ||
c == '-' ||
c == '*' ||
c == '%'
);
}

int isRelationalOperator(char* input) {

if (input[0] == '=') {
if (input[1] == '=') return 2;
}
else if (input[0] == '!') {
if (input[1] == '=') return 2;
}
else if (input[0] == '>') {
if (input[1] == '=') return 2;
return 1;
}
else if (input[0] == '<') {
if (input[1] == '=') return 2;
return 1;
}

return 0;
}

int isLogicalOperator(char* input){


if (input[0] == '^'){
return 1;
}else if (input[0] == '!'){
return 1;
}else if (input[0] == '|' && input[1] == '|'){
return 2;
}else if (input[0] == '&' && input[1] == '&'){
return 2;
}
return 0;
}

int isAssignmentOperator(char* input) {


if (input[0] == '=') {
return 1;
} else if (input[0] == '+' && input[1] == '=') {
return 2;
} else if (input[0] == '-' && input[1] == '=') {
return 2;
} else if (input[0] == '*' && input[1] == '=') {
return 2;
} else if (input[0] == '/' && input[1] == '=') {
return 2;
} else if (input[0] == '%' && input[1] == '=') {
return 2;
}
return 0;
}
// this function check for an valid identifier
bool isValidIdentifier(char* str)
{
return (str[0] != '0' && str[0] != '1' && str[0] != '2'
&& str[0] != '3' && str[0] != '4'
&& str[0] != '5' && str[0] != '6'
&& str[0] != '7' && str[0] != '8'
&& str[0] != '9' && !isDelimiter(str[0]));
}

bool isKeyword(char* str)


{
const char* keywords[]
= { "auto", "break", "case", "char",
"const", "continue", "default", "do",
"double", "else", "enum", "extern",
"float", "for", "goto", "if",
"int", "long", "register", "return",
"short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union",
"unsigned", "void", "volatile", "while" };
for (int i = 0;
i < sizeof(keywords) / sizeof(keywords[0]); i++) {
if (strcmp(str, keywords[i]) == 0) {
return true;
}
}
return false;
}
char* getSubstring(char* str, int start, int end)
{
int length = strlen(str);
int subLength = end - start + 1;
char* subStr
= (char*)malloc((subLength + 1) * sizeof(char));
strncpy(subStr, str + start, subLength);
subStr[subLength] = '\0';
return subStr;
}

// check for an integer value


bool isInteger(char* str)
{
if (str == NULL || *str == '\0') {
return false;
}
int i = 0;
while (isdigit(str[i])) {
i++;
}
return str[i] == '\0';
}

Symbol Table:

#symboltable.h

#include "token.h"
#include "string.h"

#ifndef SYMBOL_TABLE_H
#define SYMBOL_TABLE_H

extern int SYMBOL_TABLE_SIZE;


extern int TOKEN_TABLE_SIZE;

typedef struct Symbol{


Token* tok;
int size;
char type[128];
}Symbol;

void initializeSymbol(Symbol* symbol, Token* tok, int size, const char* type) {
symbol->tok = tok;
symbol->size = size;
strncpy(symbol->type, type, sizeof(symbol->type) - 1);
symbol->type[sizeof(symbol->type) - 1] = '\0';
}

void printSymbol(const Symbol* symbol) {


printf(" { ");
printToken(symbol->tok);
printf(" Size: %d, Type: %s }", symbol->size, symbol->type);
}

void initializeSymbolTable(Symbol* table[SYMBOL_TABLE_SIZE]){


for (int i = 0; i < SYMBOL_TABLE_SIZE;i++){
table[i] = (Symbol*)malloc(sizeof(Symbol));
strcpy(table[i]->type, "NULL");
table[i]->size = -1;
}
}

void getSymbolTableFromTokens(Symbol* table[SYMBOL_TABLE_SIZE], Token*


alltokens[TOKEN_TABLE_SIZE]){
int i = 0;
int r = 0;

while (i < TOKEN_TABLE_SIZE){


if (alltokens[i]->type == KEYWORD){
i++;
if (alltokens[i]->type == IDENTIFIER){ // I.E if it is a variable
initializeSymbol(table[r], alltokens[i], 8, alltokens[i-1]->token_name);
// printSymbol(table[r]);
r++;
}
}
i++;
}
}

#endif

Input Code

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

float c;
char da;

int main(){
int a;
int b;
int sum;

a = 1;
b = 1;
// singline lcomment

sum = a + b;

sum += 35;
printf("The sum of %d and %d is: %d\n", a, b, sum);

if (sum !=12345 || a >= sum ){


return 33;
}

/*
here are some
multine comments here yaaay
*/

return 0;
}

main

#main.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "token.h"
#include "preprocessor.h"
#include "symbolTable.h"

FILE* fa;
int SYMBOL_TABLE_SIZE = 1024;
int TOKEN_TABLE_SIZE = 1024;

void main(){

fa = fopen("./sample.c", "r");
int row = 0;
int col = 0;
char c;
if (fa == NULL){
printf("Cannot open file \n");
return;
}

printf("\n----------------\n Getting the raw text \n--------\n");

c = fgetc(fa);
char text[4096];
int i = 0;

while (c != EOF){
text[i] = c;
i++;
c = fgetc(fa);
}

text[i] = '\0';

printf("%s \n----------\n Passing it through the pre-processor\n----------\n",text);

runPreprocessor(text);

printf("%s \n--------\n is the code to be passed through the lexical analyzer \n---------\n",
text);

Token* alltokens[TOKEN_TABLE_SIZE];
for (int i = 0; i < TOKEN_TABLE_SIZE; i++){
alltokens[i] = createToken("EOF", -1, -1, TOK_EOF);
}

getTokens(text, alltokens);

for (int i = 0; i < TOKEN_TABLE_SIZE; i++){


if (alltokens[i]->type == 2 ){
break;
}else{
printToken(alltokens[i]);
}
}

printf("\n------\n are the tokens needed for symbol table \n-------\n");

Symbol* symTable[SYMBOL_TABLE_SIZE];
initializeSymbolTable(symTable);
getSymbolTableFromTokens(symTable, alltokens);

for (int i = 0; i < TOKEN_TABLE_SIZE; i++){


if (symTable[i]->size == 0 ){
break;
}else{
printSymbol(symTable[i]);
printf("\n");
}
}

printf("\n-----\n are the symbols received");

}
Output:

You might also like