0% found this document useful (0 votes)
9 views

Programm 1

Uploaded by

enashusingh001
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views

Programm 1

Uploaded by

enashusingh001
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 8

Write a program to design a Lexical Analyzer in C/C++ Language (to

recognize any five keywords, identifiers, numbers, operators, and


punctuations).
Outcome: It allows the students to experience the writing style
of a lexical analyzer for a common everyday language.

#include <stdbool.h>

#include <stdio.h>

#include <string.h>

#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.

bool isDelimiter(char ch)

if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||

ch == '/' || ch == ',' || ch == ';' || ch == '>' ||

ch == '<' || ch == '=' || ch == '(' || ch == ')' ||

ch == '[' || ch == ']' || ch == '{' || ch == '}')

return (true);

return (false);

// Returns 'true' if the character is an OPERATOR.

bool isOperator(char ch)

if (ch == '+' || ch == '-' || ch == '*' ||

ch == '/' || ch == '>' || ch == '<' ||

ch == '=')

return (true);

return (false);

}
// Returns 'true' if the string is a VALID IDENTIFIER.

bool validIdentifier(char* str)

if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||

str[0] == '3' || str[0] == '4' || str[0] == '5' ||

str[0] == '6' || str[0] == '7' || str[0] == '8' ||

str[0] == '9' || isDelimiter(str[0]) == true)

return (false);

return (true);

// Returns 'true' if the string is a KEYWORD.

bool isKeyword(char* str)

if (!strcmp(str, "if") || !strcmp(str, "else") ||

!strcmp(str, "while") || !strcmp(str, "do") ||

!strcmp(str, "break") ||

!strcmp(str, "continue") || !strcmp(str, "int")

|| !strcmp(str, "double") || !strcmp(str, "float")

|| !strcmp(str, "return") || !strcmp(str, "char")

|| !strcmp(str, "case") || !strcmp(str, "char")

|| !strcmp(str, "sizeof") || !strcmp(str, "long")

|| !strcmp(str, "short") || !strcmp(str, "typedef")

|| !strcmp(str, "switch") || !strcmp(str, "unsigned")

|| !strcmp(str, "void") || !strcmp(str, "static")

|| !strcmp(str, "struct") || !strcmp(str, "goto"))

return (true);

return (false);

}
// Returns 'true' if the string is an INTEGER.

bool isInteger(char* str)

int i, len = strlen(str);

if (len == 0)

return (false);

for (i = 0; i < len; i++) {

if (str[i] != '0' && str[i] != '1' && str[i] != '2'

&& str[i] != '3' && str[i] != '4' && str[i] != '5'

&& str[i] != '6' && str[i] != '7' && str[i] != '8'

&& str[i] != '9' || (str[i] == '-' && i > 0))

return (false);

return (true);

// Returns 'true' if the string is a REAL NUMBER.

bool isRealNumber(char* str)

int i, len = strlen(str);

bool hasDecimal = false;

if (len == 0)

return (false);

for (i = 0; i < len; i++) {

if (str[i] != '0' && str[i] != '1' && str[i] != '2'

&& str[i] != '3' && str[i] != '4' && str[i] != '5'

&& str[i] != '6' && str[i] != '7' && str[i] != '8'

&& str[i] != '9' && str[i] != '.' ||

(str[i] == '-' && i > 0))


return (false);

if (str[i] == '.')

hasDecimal = true;

return (hasDecimal);

// Extracts the SUBSTRING.

char* subString(char* str, int left, int right)

int i;

char* subStr = (char*)malloc(

sizeof(char) * (right - left + 2));

for (i = left; i <= right; i++)

subStr[i - left] = str[i];

subStr[right - left + 1] = '\0';

return (subStr);

// Parsing the input STRING.

void parse(char* str)

int left = 0, right = 0;

int len = strlen(str);

while (right <= len && left <= right) {

if (isDelimiter(str[right]) == false)

right++;

if (isDelimiter(str[right]) == true && left == right) {


if (isOperator(str[right]) == true)

printf("'%c' IS AN OPERATOR\n", str[right]);

right++;

left = right;

} else if (isDelimiter(str[right]) == true && left != right

|| (right == len && left != right)) {

char* subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true)

printf("'%s' IS A KEYWORD\n", subStr);

else if (isInteger(subStr) == true)

printf("'%s' IS AN INTEGER\n", subStr);

else if (isRealNumber(subStr) == true)

printf("'%s' IS A REAL NUMBER\n", subStr);

else if (validIdentifier(subStr) == true

&& isDelimiter(str[right - 1]) == false)

printf("'%s' IS A VALID IDENTIFIER\n", subStr);

else if (validIdentifier(subStr) == false

&& isDelimiter(str[right - 1]) == false)

printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);

left = right;

return;

}
// DRIVER FUNCTION

int main()

// maximum length of string is 100 here

char str[100] = "int a = b + 1c; ";

parse(str); // calling the parse function

return (0);

'int' IS A KEYWORD

'a' IS A VALID IDENTIFIER

'=' IS AN OPERATOR

'b' IS A VALID IDENTIFIER

'+' IS AN OPERATOR

'1c' IS NOT A VALID IDENTIFIER

1. Functions to Identify Tokens

 isDelimiter(char ch): This function checks if a character is a delimiter (e.g., space, operator,
parentheses, etc.). If it is, the function returns true; otherwise, it returns false.

 isOperator(char ch): This function checks if a character is an operator (e.g., +, -, *, /, etc.). If it


is, the function returns true; otherwise, it returns false.

 validIdentifier(char* str): This function checks if a given string is a valid identifier. A valid
identifier does not start with a digit and should not be a delimiter. If it meets these
conditions, the function returns true; otherwise, it returns false.

 isKeyword(char* str): This function checks if a given string matches a predefined list of
keywords (like if, else, while, etc.). If it matches any keyword, the function returns true;
otherwise, it returns false.

 isInteger(char* str): This function checks if a string is a valid integer. A valid integer string
only contains digits (0-9). If the string represents an integer, the function returns true;
otherwise, it returns false.
 isRealNumber(char* str): This function checks if a string represents a real number. A real
number contains digits and exactly one decimal point. If it is a real number, the function
returns true; otherwise, it returns false.

 subString(char* str, int left, int right): This function extracts a substring from the input string
str starting from index left to right. The function returns the extracted substring.

2. Parsing Function

 parse(char* str): This is the core function that parses the input string. It reads through the
input, identifies tokens (like keywords, operators, identifiers, etc.), and categorizes them
accordingly.

o Initialization: The function starts by initializing left and right pointers, which help in
traversing and extracting substrings from the input.

o Token Identification Loop: The loop runs while right is less than or equal to the
length of the string.

 Token Expansion: The loop expands the right pointer to find tokens between
delimiters.

 Delimiter Handling: If a delimiter is encountered:

 If the character at right is an operator and the left equals right, it


prints that the character is an operator.

 If left does not equal right, the substring between left and right-1 is
checked for whether it is a keyword, integer, real number, valid
identifier, or an invalid identifier.

 The left and right pointers are updated accordingly for the next token.

3. Driver Function

 main(): The driver function initializes a string str with a sample code snippet and calls the
parse() function to analyze it. It prints the type of each token found in the input string.

4. Example Execution

Given the input string int a = b + 1c;:

 The parser identifies:

o int as a keyword

o a as a valid identifier

o = as an operator

o b as a valid identifier

o + as an operator

o 1c as not a valid identifier (because identifiers can't start with digits or contain them
in this context)
 Conclusion
 The code serves as a basic lexical analyzer for a simple language (similar to C). It
breaks down a source string into its basic tokens and categorizes each one based on
predefined rules for identifiers, keywords, operators, and numbers. This kind of
program is a simplified version of what a compiler's lexical analysis phase might look
like.

You might also like