0% found this document useful (0 votes)
4 views

3cse080ex3

This document is a C program that implements a simple lexer to tokenize input from standard input. It identifies different token types including identifiers, numbers, keywords, operators, and unknown characters. The lexer uses a function to read characters, classify them, and print the corresponding token type until the end of the input is reached.

Uploaded by

dhivyapratha186
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views

3cse080ex3

This document is a C program that implements a simple lexer to tokenize input from standard input. It identifies different token types including identifiers, numbers, keywords, operators, and unknown characters. The lexer uses a function to read characters, classify them, and print the corresponding token type until the end of the input is reached.

Uploaded by

dhivyapratha186
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

#include <stdio.

h>
#include <ctype.h>
#include <string.h>

#define MAX_TOKEN_LEN 100

// Token types
typedef enum {
TOKEN_IDENTIFIER,
TOKEN_NUMBER,
TOKEN_KEYWORD,
TOKEN_OPERATOR,
TOKEN_UNKNOWN,
TOKEN_EOF
} TokenType;

// Keywords list
const char *keywords[] = { "int", "float", "if", "else", "while", "return" };
const int num_keywords = sizeof(keywords) / sizeof(keywords[0]);

// Function to check if a string is a keyword


int is_keyword(const char *str) {
for (int i = 0; i < num_keywords; i++) {
if (strcmp(str, keywords[i]) == 0) {
return 1;
}
}
return 0;
}

// Function to get the next token from the input


TokenType get_next_token(FILE *input, char *token) {
int c;
int pos = 0;

// Skip whitespaces
while ((c = fgetc(input)) != EOF && isspace(c));

if (c == EOF) {
return TOKEN_EOF;
}

// Handle identifiers and keywords


if (isalpha(c)) {
token[pos++] = c;
while ((c = fgetc(input)) != EOF && (isalnum(c) || c == '_')) {
token[pos++] = c;
}
token[pos] = '\0';
if (c != EOF) ungetc(c, input);

return is_keyword(token) ? TOKEN_KEYWORD : TOKEN_IDENTIFIER;


}

// Handle numbers
if (isdigit(c)) {
token[pos++] = c;
while ((c = fgetc(input)) != EOF && isdigit(c)) {
token[pos++] = c;
}
token[pos] = '\0';
if (c != EOF) ungetc(c, input);
return TOKEN_NUMBER;
}

// Handle operators and delimiters


if (strchr("+-*/;=()", c)) {
token[pos++] = c;
token[pos] = '\0';
return TOKEN_OPERATOR;
}

// Unknown characters
token[0] = c;
token[1] = '\0';
return TOKEN_UNKNOWN;
}

// Main function to drive the lexer


int main() {
FILE *input = stdin; // Reading from standard input
char token[MAX_TOKEN_LEN];
TokenType type;

while ((type = get_next_token(input, token)) != TOKEN_EOF) {


switch (type) {
case TOKEN_IDENTIFIER:
printf("Identifier: %s\n", token);
break;
case TOKEN_NUMBER:
printf("Number: %s\n", token);
break;
case TOKEN_KEYWORD:
printf("Keyword: %s\n", token);
break;
case TOKEN_OPERATOR:
printf("Operator: %s\n", token);
break;
case TOKEN_UNKNOWN:
printf("Unknown token: %s\n", token);
break;
default:
break;
}
}

return 0;
}

You might also like