0% found this document useful (0 votes)
4 views

3cse080ex3

This document is a C program that implements a simple lexer to tokenize input from standard input. It identifies different token types including identifiers, numbers, keywords, operators, and unknown characters. The lexer uses a function to read characters, classify them, and print the corresponding token type until the end of the input is reached.

Uploaded by

dhivyapratha186
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

3cse080ex3

This document is a C program that implements a simple lexer to tokenize input from standard input. It identifies different token types including identifiers, numbers, keywords, operators, and unknown characters. The lexer uses a function to read characters, classify them, and print the corresponding token type until the end of the input is reached.

Uploaded by

dhivyapratha186
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

#include <stdio.

h>
#include <ctype.h>
#include <string.h>

#define MAX_TOKEN_LEN 100

// Token types
typedef enum {
TOKEN_IDENTIFIER,
TOKEN_NUMBER,
TOKEN_KEYWORD,
TOKEN_OPERATOR,
TOKEN_UNKNOWN,
TOKEN_EOF
} TokenType;

// Keywords list
const char *keywords[] = { "int", "float", "if", "else", "while", "return" };
const int num_keywords = sizeof(keywords) / sizeof(keywords[0]);

// Function to check if a string is a keyword


int is_keyword(const char *str) {
for (int i = 0; i < num_keywords; i++) {
if (strcmp(str, keywords[i]) == 0) {
return 1;
}
}
return 0;
}

// Function to get the next token from the input


TokenType get_next_token(FILE *input, char *token) {
int c;
int pos = 0;

// Skip whitespaces
while ((c = fgetc(input)) != EOF && isspace(c));

if (c == EOF) {
return TOKEN_EOF;
}

// Handle identifiers and keywords


if (isalpha(c)) {
token[pos++] = c;
while ((c = fgetc(input)) != EOF && (isalnum(c) || c == '_')) {
token[pos++] = c;
}
token[pos] = '\0';
if (c != EOF) ungetc(c, input);

return is_keyword(token) ? TOKEN_KEYWORD : TOKEN_IDENTIFIER;


}

// Handle numbers
if (isdigit(c)) {
token[pos++] = c;
while ((c = fgetc(input)) != EOF && isdigit(c)) {
token[pos++] = c;
}
token[pos] = '\0';
if (c != EOF) ungetc(c, input);
return TOKEN_NUMBER;
}

// Handle operators and delimiters


if (strchr("+-*/;=()", c)) {
token[pos++] = c;
token[pos] = '\0';
return TOKEN_OPERATOR;
}

// Unknown characters
token[0] = c;
token[1] = '\0';
return TOKEN_UNKNOWN;
}

// Main function to drive the lexer


int main() {
FILE *input = stdin; // Reading from standard input
char token[MAX_TOKEN_LEN];
TokenType type;

while ((type = get_next_token(input, token)) != TOKEN_EOF) {


switch (type) {
case TOKEN_IDENTIFIER:
printf("Identifier: %s\n", token);
break;
case TOKEN_NUMBER:
printf("Number: %s\n", token);
break;
case TOKEN_KEYWORD:
printf("Keyword: %s\n", token);
break;
case TOKEN_OPERATOR:
printf("Operator: %s\n", token);
break;
case TOKEN_UNKNOWN:
printf("Unknown token: %s\n", token);
break;
default:
break;
}
}

return 0;
}

You might also like