0% found this document useful (0 votes)
5 views4 pages

Tokencc C

Program on c

Uploaded by

h6643246
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views4 pages

Tokencc C

Program on c

Uploaded by

h6643246
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

File: /home/shahana/tokencc.

c Page 1 of 4

#include <stdio.h>
#include <ctype.h>
#include <string.h>

// Define token types


typedef enum {
TOKEN_KEYWORD,
TOKEN_IDENTIFIER,
TOKEN_CONSTANT,
TOKEN_STRING,
TOKEN_OPERATOR,
TOKEN_SPECIAL_SYMBOL,
TOKEN_UNKNOWN,
TOKEN_EOF
} TokenType;

// Define a structure for tokens


typedef struct {
TokenType type;
char lexeme[100];
} Token;

// List of keywords
const char *keywords[] = {
"if", "else", "while", "for", "return", "int", "float", "void", "char", "double"
};

const int keywordCount = 10;

// Function to check if a lexeme is a keyword


int isKeyword(char *lexeme) {
for (int i = 0; i < keywordCount; i++) {
if (strcmp(lexeme, keywords[i]) == 0) {
return 1;
}
}
return 0;
}

// Function to classify a character


int isSpecialSymbol(char ch) {
return (ch == ',' || ch == ';' || ch == '(' || ch == ')' ||
ch == '[' || ch == ']' || ch == '{' || ch == '}' ||
ch == '&' || ch == '|' || ch == '!' || ch == '#' ||
ch == '^' || ch == '~' || ch == '?' || ch == ':');
}

int isOperator(char ch) {


return (ch == '+' || ch == '-' || ch == '*' || ch == '/' ||
ch == '>' || ch == '<' || ch == '=' || ch == '&' ||
ch == '|' || ch == '!');
}

int isDigit(char ch) {


return (ch >= '0' && ch <= '9');
}

int isIdentifierStart(char ch) {


return (isalpha(ch) || ch == '_');
}

int isIdentifierPart(char ch) {


return (isalnum(ch) || ch == '_');
}

// Function to get the next token


File: /home/shahana/tokencc.c Page 2 of 4

Token getNextToken(FILE *source) {


Token token;
int ch = fgetc(source);

// Ignore whitespaces, tabs, and newlines


while (isspace(ch)) {
ch = fgetc(source);
}

// Ignore comments
if (ch == '/') {
int nextChar = fgetc(source);
if (nextChar == '/') {
// Single-line comment
while (ch != '\n' && ch != EOF) {
ch = fgetc(source);
}
return getNextToken(source); // Ignore comment and get next token
} else if (nextChar == '*') {
// Multi-line comment
while (1) {
ch = fgetc(source);
if (ch == '*' && (ch = fgetc(source)) == '/') {
break;
}
if (ch == EOF) {
break;
}
}
return getNextToken(source); // Ignore comment and get next token
} else {
ungetc(nextChar, source);
}
}

// Check for EOF


if (ch == EOF) {
token.type = TOKEN_EOF;
strcpy(token.lexeme, "EOF");
return token;
}

// Handle strings
if (ch == '\"') {
int i = 0;
token.lexeme[i++] = ch;
ch = fgetc(source);
while (ch != '\"' && ch != EOF) {
token.lexeme[i++] = ch;
ch = fgetc(source);
}
token.lexeme[i++] = ch;
token.lexeme[i] = '\0';
token.type = TOKEN_STRING;
return token;
}

// Handle identifiers and keywords


if (isIdentifierStart(ch)) {
int i = 0;
token.lexeme[i++] = ch;
ch = fgetc(source);
while (isIdentifierPart(ch)) {
token.lexeme[i++] = ch;
ch = fgetc(source);
}
File: /home/shahana/tokencc.c Page 3 of 4

ungetc(ch, source);
token.lexeme[i] = '\0';

if (isKeyword(token.lexeme)) {
token.type = TOKEN_KEYWORD;
} else {
token.type = TOKEN_IDENTIFIER;
}
return token;
}

// Handle numeric constants


if (isDigit(ch)) {
int i = 0;
token.lexeme[i++] = ch;
ch = fgetc(source);
while (isDigit(ch)) {
token.lexeme[i++] = ch;
ch = fgetc(source);
}
ungetc(ch, source);
token.lexeme[i] = '\0';
token.type = TOKEN_CONSTANT;
return token;
}

// Handle operators
if (isOperator(ch)) {
token.type = TOKEN_OPERATOR;
token.lexeme[0] = ch;
token.lexeme[1] = '\0';
return token;
}

// Handle special symbols


if (isSpecialSymbol(ch)) {
token.type = TOKEN_SPECIAL_SYMBOL;
token.lexeme[0] = ch;
token.lexeme[1] = '\0';
return token;
}

// Unknown token
token.type = TOKEN_UNKNOWN;
token.lexeme[0] = ch;
token.lexeme[1] = '\0';
return token;
}

// Function to print tokens


void printToken(Token token) {
switch (token.type) {
case TOKEN_KEYWORD: printf("Keyword: %s\n", token.lexeme); break;
case TOKEN_IDENTIFIER: printf("Identifier: %s\n", token.lexeme); break;
case TOKEN_CONSTANT: printf("Constant: %s\n", token.lexeme); break;
case TOKEN_STRING: printf("String: %s\n", token.lexeme); break;
case TOKEN_OPERATOR: printf("Operator: %s\n", token.lexeme); break;
case TOKEN_SPECIAL_SYMBOL: printf("Special Symbol: %s\n", token.lexeme); break;
case TOKEN_UNKNOWN: printf("Unknown: %s\n", token.lexeme); break;
case TOKEN_EOF: printf("End of File\n"); break;
default: break;
}
}

int main() {
FILE *source = fopen("sample1.c", "r");
File: /home/shahana/tokencc.c Page 4 of 4

if (source == NULL) {
printf("Error: Unable to open file.\n");
return 1;
}

Token token;
do {
token = getNextToken(source);
printToken(token);
} while (token.type != TOKEN_EOF);

fclose(source);
return 0;
}

You might also like