0% found this document useful (0 votes)
18 views3 pages

3

The document is a C program that implements a lexical analyzer for parsing and tokenizing input strings. It defines functions to identify delimiters, operators, keywords, valid identifiers, and numbers, and processes the input to extract and categorize these tokens. The main function demonstrates the lexical analyzer using a sample C code snippet.

Uploaded by

gg4480
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views3 pages

3

The document is a C program that implements a lexical analyzer for parsing and tokenizing input strings. It defines functions to identify delimiters, operators, keywords, valid identifiers, and numbers, and processes the input to extract and categorize these tokens. The main function demonstrates the lexical analyzer using a sample C code snippet.

Uploaded by

gg4480
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

#include <ctype.

h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_LENGTH 100

// Function to check if a character is a delimiter


bool isDelimiter(char chr) {
return (chr == ' ' || chr == '+' || chr == '-' || chr == '*' || chr == '/' ||
chr == ',' ||
chr == ';' || chr == '%' || chr == '>' || chr == '<' || chr == '=' ||
chr == '(' ||
chr == ')' || chr == '[' || chr == ']' || chr == '{' || chr == '}');
}

// Function to check if a character is an operator


bool isOperator(char chr) {
return (chr == '+' || chr == '-' || chr == '*' || chr == '/' || chr == '%' ||
chr == '>' || chr == '<' || chr == '=' || chr == '&' || chr == '|');
}

// Function to check if a string is a valid identifier


bool isValidIdentifier(char *str) {
if (!isalpha(str[0]) && str[0] != '_') // Identifiers must start with a letter
or underscore
return false;

for (int i = 1; i < strlen(str); i++) {


if (!isalnum(str[i]) && str[i] != '_') // Can contain letters, numbers, or
underscores
return false;
}

return true;
}

// Function to check if a string is a keyword


bool isKeyword(char *str) {
const char *keywords[] = {
"auto", "break", "case", "char", "const", "continue", "default", "do",
"double", "else", "enum",
"extern", "float", "for", "goto", "if", "int", "long", "register",
"return", "short", "signed",
"sizeof", "static", "struct", "switch", "typedef", "union", "unsigned",
"void", "volatile", "while"
};

for (int i = 0; i < sizeof(keywords) / sizeof(keywords[0]); i++) {


if (strcmp(str, keywords[i]) == 0)
return true;
}

return false;
}

// Function to check if a string is an integer or floating point number


bool isNumber(char *str) {
bool hasDecimal = false;
int i = 0;

if (str[0] == '-') // Allow negative numbers


i++;

for (; str[i] != '\0'; i++) {


if (!isdigit(str[i])) {
if (str[i] == '.' && !hasDecimal) {
hasDecimal = true; // Allow only one decimal point
} else {
return false;
}
}
}

return true;
}

// Function to extract a substring from a string


char *getSubstring(char *str, int start, int end) {
int subLength = end - start + 1;
char *subStr = (char *)malloc((subLength + 1) * sizeof(char));
strncpy(subStr, str + start, subLength);
subStr[subLength] = '\0';
return subStr;
}

// Lexical Analyzer Function


void lexicalAnalyzer(char *input) {
int left = 0, right = 0, len = strlen(input);

while (right < len) {


// Handling string literals
if (input[right] == '"') {
left = right;
right++;
while (right < len && input[right] != '"')
right++;
right++; // Include closing quote
char *strLiteral = getSubstring(input, left, right - 1);
printf("Token: String Literal, Value: %s\n", strLiteral);
free(strLiteral); // Free allocated memory
left = right;
continue;
}

// Handling single-line and multi-line comments


if (input[right] == '/' && input[right + 1] == '/') {
while (input[right] != '\n' && right < len)
right++;
left = right;
continue;
} else if (input[right] == '/' && input[right + 1] == '*') {
right += 2;
while (right < len && !(input[right] == '*' && input[right + 1] ==
'/'))
right++;
right += 2; // Include closing */
left = right;
continue;
}

// Detecting multi-character operators (>=, ==, !=, etc.)


if (isOperator(input[right])) {
if (isOperator(input[right + 1])) {
printf("Token: Operator, Value: %c%c\n", input[right], input[right
+ 1]);
right += 2;
} else {
printf("Token: Operator, Value: %c\n", input[right]);
right++;
}
left = right;
continue;
}

// Skipping delimiters
if (isDelimiter(input[right])) {
if (input[right] != ' ') // Ignore spaces
printf("Token: Delimiter, Value: %c\n", input[right]);
right++;
left = right;
continue;
}

// Processing words (identifiers, keywords, numbers)


while (right < len && !isDelimiter(input[right]))
right++;

char *subStr = getSubstring(input, left, right - 1);

if (isKeyword(subStr))
printf("Token: Keyword, Value: %s\n", subStr);
else if (isNumber(subStr))
printf("Token: Number, Value: %s\n", subStr);
else if (isValidIdentifier(subStr))
printf("Token: Identifier, Value: %s\n", subStr);
else
printf("Token: Unidentified, Value: %s\n", subStr);

free(subStr); // Free allocated memory


left = right;
}
}

// Main function
int main() {
char lex_input[MAX_LENGTH] = "int main() { int a = 10, b = 20; float c = 3.14;
if (a >= b) printf(\"Hello World!\"); return 0; }";

printf("For Expression:\n%s\n\n", lex_input);


lexicalAnalyzer(lex_input);

return 0;
}

You might also like