0% found this document useful (0 votes)
6 views33 pages

CD File

The document provides a comprehensive implementation of a lexical analyzer in C, detailing token types, keyword recognition, and token extraction from an input file. It also includes implementations using the Lex tool and YACC for parsing arithmetic expressions and variable recognition, along with generating an abstract syntax tree. Additionally, it describes a method to find the ε-closure of states in a given NFA with ε transitions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views33 pages

CD File

The document provides a comprehensive implementation of a lexical analyzer in C, detailing token types, keyword recognition, and token extraction from an input file. It also includes implementations using the Lex tool and YACC for parsing arithmetic expressions and variable recognition, along with generating an abstract syntax tree. Additionally, it describes a method to find the ε-closure of states in a given NFA with ε transitions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 33

1.

Lexical Analyzer Implementation in C

#include
<stdio.h>
#include
<ctype.h>
#include
<string.h>

#define MAX_TOKEN_LENGTH 100

// Token
types
typedef
enum {
TOKEN_IDENTIFIER,
TOKEN_KEYWORD,
TOKEN_NUMBER,
TOKEN_OPERATOR,
TOKEN_SYMBOL,
TOKEN_EOF
} TokenType;

// List of keywords
const char *keywords[] = { "if", "else", "while", "return", "int", "float",
"char", "void"
};
#define KEYWORDS_COUNT (sizeof(keywords) / sizeof(keywords[0]))

// Function to check if a word is a


keyword int isKeyword(const char
*word) {
for (int i = 0; i <
KEYWORDS_COUNT; i++) { if
(strcmp(word, keywords[i]) == 0)
{
return 1;
}
}
return 0;
}

AVINASH KUMAR THAKUR 2200910100040


// Structure for a token
typedef struct {
TokenType type;
char value[MAX_TOKEN_LENGTH];
} Token;

// Function to get the next token

AVINASH KUMAR THAKUR 2200910100040


Token getNextToken(FILE *file) {
Token token;
int ch;

// Skip whitespaces, tabs, and newlines


while ((ch = fgetc(file)) != EOF CC (ch == ' ' || ch == '\t' || ch == '\n'));

if (ch == EOF) {
token.type = TOKEN_EOF;
strcpy(token.value, "EOF");
return token;
}

// Identifier or Keyword
if (isalpha(ch) || ch ==
'_') { int i = 0;
char buffer[MAX_TOKEN_LENGTH];
buffer[i++] = ch;

while ((ch = fgetc(file)) != EOF CC (isalnum(ch) || ch


== '_')) { buffer[i++] = ch;
}
buffer[i] = '\0';

ungetc(ch, file); // Put back the last read

character if (isKeyword(buffer)) {
token.type = TOKEN_KEYWORD;
} else {
token.type = TOKEN_IDENTIFIER;
}
strcpy(token.value,
buffer); return token;
}

// Number
if (isdigit(ch))
{ int i = 0;
char buffer[MAX_TOKEN_LENGTH];
buffer[i++] = ch;

AVINASH KUMAR THAKUR 2200910100040


while ((ch = fgetc(file)) != EOF CC
isdigit(ch)) { buffer[i++] = ch;
}
buffer[i] = '\0';

ungetc(ch, file);
token.type =
TOKEN_NUMBER;
strcpy(token.value, buffer);
return token;
}

// Operators
if (ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch == '<' ||
ch == '>') { token.type = TOKEN_OPERATOR;
token.value[0] =
ch; token.value[1]
= '\0'; return
token;
}

// Symbols
if (ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == ';' || ch ==
',') { token.type = TOKEN_SYMBOL;
token.value[0] =
ch; token.value[1]
= '\0'; return
token;
}

// Unknown character
token.type =
TOKEN_EOF;
strcpy(token.value,
"UNKNOWN"); return token;
}

// Main function to tokenize input


file int main() {
FILE *file = fopen("input.txt",
"r"); if (!file) {
printf("Error opening file!\n");
AVINASH KUMAR THAKUR 2200910100040
return 1;
}

AVINASH KUMAR THAKUR 2200910100040


Token
token; do {
token =
getNextToken(file);
switch (token.type) {
case TOKEN_IDENTIFIER: printf("IDENTIFIER: %s\n",
token.value); break; case TOKEN_KEYWORD: printf("KEYWORD:
%s\n", token.value); break; case TOKEN_NUMBER:
printf("NUMBER: %s\n", token.value); break; case
TOKEN_OPERATOR: printf("OPERATOR: %s\n", token.value);
break; case TOKEN_SYMBOL: printf("SYMBOL: %s\n",
token.value); break;
case TOKEN_EOF: break;
}
} while (token.type != TOKEN_EOF);

fclose(file)
; return
0;
}

=>input.txt

int main()
{ int a =
10;
float b =
20.5; if (a <
b) {
return a;
}
}

Output –
KEYWORD: int
IDENTIFIER:
main SYMBOL: (
SYMBOL: )
SYMBOL:
{ KEYWORD: int
IDENTIFIER: a
OPERATOR: =

AVINASH KUMAR THAKUR 2200910100040


NUMBER: 10
SYMBOL: ;
KEYWORD:
float
IDENTIFIER: b
OPERATOR: =

AVINASH KUMAR THAKUR 2200910100040


NUMBER: 20
SYMBOL: .
NUMBER: 5
SYMBOL: ;
KEYWORD: if
SYMBOL:
( IDENTIFIER:
a OPERATOR:
< IDENTIFIER:
b SYMBOL: )
SYMBOL: {
KEYWORD:
return
IDENTIFIER: a
SYMBOL: ;
SYMBOL: }
SYMBOL: }

AVINASH KUMAR THAKUR 2200910100040


2. Implementation of Lexical Analyzer using Lex
Tool

//Write Lex code(lexer.l)


//lexer.l
%{
#include <stdio.h>
%}

/* Define Token Types */


KEYWORD
(int|float|char|double|if|else|while|for|return|void|switch|case|break|continue)
IDENTIFIER [a-zA-Z_][a-zA-Z0-9_]*
NUMBER [0-9]+(\.[0-9]+)?
OPERATOR (\+|\-|\*|\/|\=|\<\=|\>\=|\=\=|\!\=|\<|\
>) SYMBOL [\{\}\(\)\;\,]

%%

{KEYWORD}{ printf("KEYWORD: %s\n", yytext); }


{IDENTIFIER} { printf("IDENTIFIER: %s\n", yytext); }
{NUMBER} { printf("NUMBER: %s\n", yytext); }
{OPERATOR} { printf("OPERATOR: %s\n", yytext); }
{SYMBOL} { printf("SYMBOL: %s\n", yytext); }

[ \t\n]+ ; /* Ignore spaces, tabs, and newlines */

. { printf("UNKNOWN: %s\n", yytext); } /* Catch any unknown characters


*/

%%

int main()
{ yylex(
);
return
0;
}

int
yywrap()
{ return
1;
}

//Input.txt
AVINASH KUMAR THAKUR 2200910100040
int main()
{ int a
= 10;
float b = 20.5;

AVINASH KUMAR THAKUR 2200910100040


if (a <=
b) { a
=a+
5;
}
}

Output-
KEYWORD: int
IDENTIFIER:
main SYMBOL: (
SYMBOL: )
SYMBOL:
{ KEYWORD: int
IDENTIFIER: a
OPERATOR: =
NUMBER: 10
SYMBOL: ;
KEYWORD:
float
IDENTIFIER: b
OPERATOR: =
NUMBER: 20.5
SYMBOL: ;
KEYWORD: if
SYMBOL:
( IDENTIFIER:
a OPERATOR:
<=
IDENTIFIER: b
SYMBOL: )
SYMBOL:
{ IDENTIFIER:
a OPERATOR:
= IDENTIFIER:
a OPERATOR:
+ NUMBER: 5
SYMBOL: ;
SYMBOL: }
SYMBOL: }

AVINASH KUMAR THAKUR 2200910100040


3 . Generate YACC specification for a few syntactic
categories.
A- Program to recognize a valid arithmetic expression
that uses operator +, – , * and /.

//Lex Code(arith.1)
%{
#include "y.tab.h"
%}

DIGIT [0-9]+

%%

{DIGIT} { yylval = atoi(yytext); return


NUMBER; } "+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULT; }
"/" { return DIV; }
"(" { return LPAREN; }
")" { return RPAREN; }
[ \t\n]+ ; // Ignore spaces

%%

//YACC code(arith.y)
%{
#include <stdio.h>
%}

%token NUMBER PLUS MINUS MULT DIV LPAREN RPAREN

%%

expr: expr PLUS term | expr MINUS term |


term; term: term MULT factor | term DIV
factor | factor; factor: NUMBER | LPAREN
expr RPAREN;

%%

AVINASH KUMAR THAKUR 2200910100040


int main() {

AVINASH KUMAR THAKUR 2200910100040


printf("Enter an expression: ");
yyparse();
return 0;
}

int yyerror(char *s) {


printf("Invalid expression\
n"); return 0;
}

Input:
(5 + 3) * 2

Output:
Valid Expression

AVINASH KUMAR THAKUR 2200910100040


3 B- Program to recognize a valid variable which
starts with a letter followed by any number
of letters or digits.

//Lex code(var.1)
%{
#include "y.tab.h"
%}

LETTER [a-zA-
Z] DIGIT [0-9]
IDENTIFIER {LETTER}({LETTER}|{DIGIT})*

%%

{IDENTIFIER} { return IDENTIFIER; }


[ \t\n]+ ; // Ignore spaces

%%

//YACC code(var.y)

%{
#include <stdio.h>
%}

%token IDENTIFIER

%%

stmt: IDENTIFIER { printf("Valid Variable\n"); };

%%

int main() {
yyparse();
return 0;
}

int yyerror(char *s) {


printf("Invalid variable\

AVINASH KUMAR THAKUR 2200910100040


n");

AVINASH KUMAR THAKUR 2200910100040


return 0;
}

Input:
myVar123

Output:
Valid variable

AVINASH KUMAR THAKUR 2200910100040


3 C- mplementation of Calculator using LEX and
YACC

//Lex code(calc.1)

%{
#include "y.tab.h"
%}

DIGIT [0-9]+

%%

{DIGIT} { yylval = atoi(yytext); return


NUMBER; } "+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULT; }
"/" { return DIV; }
"(" { return LPAREN; }
")" { return RPAREN; }
[ \t\n]+ ; // Ignore spaces

%%

//YACC code(calc.y)

%{
#include <stdio.h>
%}

%token NUMBER PLUS MINUS MULT DIV LPAREN RPAREN


%left PLUS MINUS
%left MULT DIV

%%

expr: expr PLUS term { $$ = $1 + $3; }


| expr MINUS term { $$ = $1 - $3; }
| term;

term: term MULT factor { $$ = $1 * $3; }


| term DIV factor { $$ = $1 / $3; }
AVINASH KUMAR THAKUR 2200910100040
| factor;

factor: NUMBER | LPAREN expr RPAREN { $$ = $2; };

%%

int main() {
printf("Enter an arithmetic expression: ");
yyparse();
return 0;
}

int yyerror(char *s) {


printf("Syntax Error\
n"); return 0;
}

Input:
3+5*2

Output:
Valid Expression

AVINASH KUMAR THAKUR 2200910100040


3 D- Convert the BNF rules into YACC form and
write code to generate abstract syntax tree

// YACC code(ast.y)

%{
#include <stdio.h>
#include <stdlib.h>

typedef struct
Node { char
*value;
struct Node *left;
struct Node *right;
} Node;

Node* createNode(char *value, Node *left, Node


*right) { Node *node = (Node*)
malloc(sizeof(Node));
node->value =
value; node->left
= left; node-
>right = right;
return node;
}

void printAST(Node *node, int


level) { if (node == NULL)
return;
for (int i = 0; i < level; i++)
printf(" "); printf("%s\n", node-
>value); printAST(node->left,
level + 1); printAST(node-
>right, level + 1);
}

%}

%token NUMBER PLUS MINUS MULT DIV LPAREN RPAREN

%%

AVINASH KUMAR THAKUR 2200910100040


expr: expr PLUS term { $$ = createNode("+", $1, $3); }
| expr MINUS term { $$ = createNode("-", $1, $3); }
| term { $$ = $1; };

AVINASH KUMAR THAKUR 2200910100040


term: term MULT factor { $$ = createNode("*", $1, $3); }
| term DIV factor { $$ = createNode("/", $1, $3); }
| factor { $$ = $1; };

factor: NUMBER { $$ = createNode(yytext, NULL, NULL); }


| LPAREN expr RPAREN { $$ = $2; };

%%

int main() {
yyparse();
return 0;
}

int yyerror(char *s) {


printf("Syntax Error\
n"); return 0;
}

AVINASH KUMAR THAKUR 2200910100040


4. Write program to find ε – closure of all states of any given NFA with
ε transition.

#include <stdio.h>
#include <stdlib.h>

#define MAX_STATES 100

// Stack structure to perform DFS


typedef struct {
int arr[MAX_STATES];
int top;
} Stack;

void initStack(Stack* stack)


{ stack->top = -1;
}

int isEmpty(Stack* stack)


{ return stack->top == -1;
}

void push(Stack* stack, int value)


{ stack->arr[++stack->top] =
value;
}

int pop(Stack* stack)


{ if (!isEmpty(stack))
{
return stack->arr[stack->top--];
}
return -1;
}

// Function to perform DFS and find ε-closure


void findEClosure(int state, int epsilonTransitions[MAX_STATES][MAX_STATES],
int numStates, int epsilonCount[MAX_STATES], int closure[MAX_STATES]) {
Stack stack;
initStack(&stack);
push(&stack, state);

while (!isEmpty(&stack)) {
AVINASH KUMAR THAKUR 2200910100040
int currentState = pop(&stack);

AVINASH KUMAR THAKUR 2200910100040


// If the state is already in closure, skip it
if (closure[currentState]) continue;

// Add the state to closure


closure[currentState] = 1;

// Push all states reachable by ε-transition


for (int i = 0; i < epsilonCount[currentState]; i++)
{ int nextState = epsilonTransitions[currentState]
[i]; if (!closure[nextState]) {
push(&stack, nextState);
}
}
}
}

// Function to compute ε-closure for all states


void computeEClosures(int numStates, int
epsilonTransitions[MAX_STATES][MAX_STATES], int
epsilonCount[MAX_STATES]) {
for (int state = 0; state < numStates; state++)
{ int closure[MAX_STATES] = {0};
findEClosure(state, epsilonTransitions, numStates, epsilonCount, closure);

// Print the ε-closure


printf("ε-Closure of state %d = { ", state);
for (int i = 0; i < numStates; i++) {
if (closure[i]) {
printf("%d ", i);
}
}
printf("}\n");
}
}

int main() {
int numStates, numEpsilonTransitions;

// Input number of states


printf("Enter number of states: ");
scanf("%d", &numStates);

AVINASH KUMAR THAKUR 2200910100040


// Initialize epsilon transitions table and epsilon count array
int epsilonTransitions[MAX_STATES][MAX_STATES] = {0};
int epsilonCount[MAX_STATES] = {0};

// Input number of ε-transitions


printf("Enter number of ε-transitions: ");
scanf("%d", &numEpsilonTransitions);

// Input ε-transitions (from state, to state)


printf("Enter ε-transitions (from state to state):\n");
for (int i = 0; i < numEpsilonTransitions; i++) {
int from, to;
scanf("%d %d", &from, &to); epsilonTransitions[from]
[epsilonCount[from]++] = to;
}

// Compute and print ε-closures


computeEClosures(numStates, epsilonTransitions, epsilonCount);

return 0;
}

Input:
Enter number of states: 4
Enter number of ε-transitions: 4
Enter ε-transitions (from state to state):
01
12
23
33

Output:
ε-Closure of state 0 = { 0 1 2 3 }
ε-Closure of state 1 = { 1 2 3 }
ε-Closure of state 2 = { 2 3 }
ε-Closure of state 3 = { 3 }

AVINASH KUMAR THAKUR 2200910100040


EXPERIEMENT- 5
AIM: Write program to convert NFA with ε transition to NFA without ε
transition.
CODE:
#include <stdio.h>

#include <stdlib.h>

#define MAX_STATES 100

#define MAX_ALPHABET 10

int numStates, numAlphabet;

char alphabet[MAX_ALPHABET];

int epsilonTransitions[MAX_STATES][MAX_STATES];

int transitions[MAX_STATES][MAX_ALPHABET][MAX_STATES];

int epsilonCount[MAX_STATES];

int transitionCount[MAX_STATES][MAX_ALPHABET];

// Stack structure for DFS

typedef struct {

int arr[MAX_STATES];

int top;

} Stack;

void initStack(Stack* stack)

{ stack->top = -1;

void push(Stack* stack, int value)

{ stack->arr[++stack->top] =

value;

AVINASH KUMAR THAKUR 2200910100040


}

AVINASH KUMAR THAKUR 2200910100040


int pop(Stack* stack) {

return (stack->top == -1) ? -1 : stack->arr[stack->top--];

// Compute ε-closure of a state

void findEClosure(int state, int closure[MAX_STATES])

{ Stack stack;

initStack(&stack);

push(&stack, state);

while (stack.top != -1) {

int current = pop(&stack);

if (!closure[current])

{ closure[current] = 1;

for (int i = 0; i < epsilonCount[current]; i++)

{ push(&stack, epsilonTransitions[current]

[i]);

// Convert NFA with ε to NFA without ε

void convertNFA() {

int newTransitions[MAX_STATES][MAX_ALPHABET][MAX_STATES] = {0};

int newTransitionCount[MAX_STATES][MAX_ALPHABET] = {0};

for (int state = 0; state < numStates; state++)

{ int closure[MAX_STATES] = {0};


AVINASH KUMAR THAKUR 2200910100040
findEClosure(state, closure);

for (int i = 0; i < numAlphabet; i++)

{ for (int q = 0; q < numStates; q++)

if (closure[q]) {

for (int j = 0; j < transitionCount[q][i]; j++)

{ int nextState = transitions[q][i][j];

newTransitions[state][i][newTransitionCount[state][i]++] = nextState;

// Copy new transitions back

for (int i = 0; i < numStates; i++)

for (int j = 0; j < numAlphabet; j++)

for (int k = 0; k < newTransitionCount[i][j]; k++)

transitions[i][j][k] = newTransitions[i][j][k];

// Print new NFA

printf("\nNFA Without ε-Transitions:\n");

for (int i = 0; i < numStates; i++) {

for (int j = 0; j < numAlphabet; j++)

{ printf("δ(%d, %c) -> { ", i, alphabet[j]);

for (int k = 0; k < newTransitionCount[i][j]; k++)

printf("%d ", transitions[i][j][k]);

printf("}\n");

}
AVINASH KUMAR THAKUR 2200910100040
}

int main() {

int numEpsilonTransitions, numRegularTransitions;

// Input number of states and alphabet

printf("Enter number of states: ");

scanf("%d", &numStates);

printf("Enter number of input symbols: ");

scanf("%d", &numAlphabet);

printf("Enter input symbols: ");

for (int i = 0; i < numAlphabet; i++)

scanf(" %c", &alphabet[i]);

// Input ε-transitions

printf("Enter number of ε-transitions: ");

scanf("%d", &numEpsilonTransitions);

printf("Enter ε-transitions (from to):\n");

for (int i = 0; i < numEpsilonTransitions; i++)

{ int from, to;

scanf("%d %d", &from, &to); epsilonTransitions[from]

[epsilonCount[from]++] = to;

// Input regular transitions

AVINASH KUMAR THAKUR 2200910100040


printf("Enter number of regular transitions: ");

scanf("%d", &numRegularTransitions);

printf("Enter transitions (from symbol to):\n");

for (int i = 0; i < numRegularTransitions; i++) {

int from, to;

char symbol;

scanf("%d %c %d", &from, &symbol, &to);

int symbolIndex = -1;

for (int j = 0; j < numAlphabet; j++)

if (alphabet[j] == symbol)

symbolIndex = j;

if (symbolIndex != -1) transitions[from][symbolIndex][transitionCount[from]

[symbolIndex]++] = to;

// Convert NFA with ε to NFA without ε

convertNFA();

return 0;

Input:

Enter number of states: 3

Enter number of input symbols: 2

Enter input symbols: a b

Enter number of ε-transitions: 2

AVINASH KUMAR THAKUR 2200910100040


Enter ε-transitions (from to):

01

12

Enter number of regular transitions: 2

Enter transitions (from symbol to):

1a2

2b0

Output:

NFA Without ε-Transitions:

δ(0, a) -> { 2 }

δ(0, b) -> { 0 }

δ(1, a) -> { 2 }

δ(1, b) -> { 0 }

δ(2, a) -> { }

δ(2, b) -> { 0 }

AVINASH KUMAR THAKUR 2200910100040

You might also like