0% found this document useful (0 votes)
13 views34 pages

CSE304

This document is a practical file for a Compiler Construction course at Amity University, detailing various programming tasks related to regular expressions, lexical analysis, parsing techniques, and grammar generation. It includes specific programming assignments such as designing a lexical analyzer for C language tokens, eliminating left recursion, and implementing a recursive descent calculator. The document also contains code snippets for each task, demonstrating the implementation of the required functionalities.

Uploaded by

Do you Know me
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views34 pages

CSE304

This document is a practical file for a Compiler Construction course at Amity University, detailing various programming tasks related to regular expressions, lexical analysis, parsing techniques, and grammar generation. It includes specific programming assignments such as designing a lexical analyzer for C language tokens, eliminating left recursion, and implementing a recursive descent calculator. The document also contains code snippets for each task, demonstrating the implementation of the required functionalities.

Uploaded by

Do you Know me
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 34

[CSE304] - Compiler

Construction
Amity University, Greater Noida

Practical File

Supervisor:
Ms Meenakshi
Department of Computer Science and Engineering

Avinash Prasad Sinha


A41105222211
B-Tech (Computer Science Engineering) Semester – VI
Table Of Contents

Serial Main Content


Number

1. Consider the following regular expressions: a) (0 + 1) * + 01 b) (ab*c + (def)


+ + a*d+e)+ c) ((a + b)(c + d))+ + abcd. write separate programs for each of
the regular expressions mentioned above.

2. Design a Lexical analyser for identifying different types of token used in C


language.

3. Write a program which accepts a regular expression from the user and
generates a regular grammar which is equivalent to the R.E. entered by user.
The grammar will be printed to a text file, with only one production rule in
eac h line. Also, make sure that all production rules are displayed in
compact forms e.g. the production rules: S--> aB, S--> cd S--> PQ Should be
written as S--> aB | cd | PQ And not as three different production rules.
Also, there should not be any repetition of product

4. Write a program to eliminate left recursion

5. Write a program for Recursive Descent Calculator.

6. Write that recognizes different a program types of English words

7. Consider the following grammar: S --> ABC A--> abA | ab B--> b | BC C-->
c | cC Following any suitable parsing technique (prefer top-down), design
a parser which accepts a string and tells whether the string is accepted by
above grammar or not.

8. Write a program which accepts a regular grammar with no left-recursion, and


no null-production rules, and then it accepts a string and reports
whether the string is accepted by the grammar or not.

9. Design a parser which accepts a mathematical expression (containing integers


only). If the expression is valid, then evaluate the expression else
report that the expression is invalid. [Note: Design first the Grammar and then
implement using Shift-Reduce parsing technique. Your program should
generate an output file clearly showing each step of parsing/evaluation of the
intermediate sub-expressions.]

10. Open Ended program: Designing of various type parser

1. Consider the following regular expressions: a) (0 + 1)* + 01 b) (ab*c


+ (def)+ + a*d+e)+ c) ((a + b)(c + d))+ + abcd. write separate
programs for each of the regular expressions mentioned above.
a) (0 + 1)* + 01

#include <stdio.h>
#include <string.h>
#include <stdbool.h>

bool matches_a(const char *str) {


// Check for (0 + 1)* case - any combination of 0s and 1s
bool case1 = true;
for (int i = 0; str[i] != '\0'; i++) {
if (str[i] != '0' && str[i] != '1') {
case1 = false;
break;
}
}

// Check for "01" case


bool case2 = (strlen(str) == 2 && str[0] == '0' && str[1] == '1');

return case1 || case2;


}

int main() {
char input[100];
printf("Enter a string to test against (0 + 1)* + 01: ");
scanf("%s", input);

if (matches_a(input)) {
printf("The string matches the pattern.\n");
} else {
printf("The string does not match the pattern.\n");
}

return 0;
}

b) (ab*c + (def)+ + a*d+e)+

#include <stdio.h>
#include <string.h>
#include <stdbool.h>

bool matches_b(const char *str) {


int len = strlen(str);
if (len == 0) return false; // Empty string doesn't match

int i = 0;
while (i < len) {
// Check for ab*c pattern
if (i < len && str[i] == 'a') {
i++;
int b_count = 0;
while (i < len && str[i] == 'b') {
b_count++;
i++;
}
if (i < len && str[i] == 'c') {
i++;
continue;
} else if (b_count == 0 && i < len && str[i] == 'c') {
i++;
continue;
}
}

// Check for (def)+ pattern


if (i < len && str[i] == 'd') {
bool def_matched = true;
while (i + 2 < len && str[i] == 'd' && str[i+1] == 'e' && str[i+2] == 'f') {
i += 3;
}
if (i >= len || (str[i] != 'd' && str[i] != 'a')) {
continue;
}
}

// Check for a*d+e pattern


if (i < len && str[i] == 'a') {
int a_count = 0;
while (i < len && str[i] == 'a') {
a_count++;
i++;
}
if (i < len && str[i] == 'd') {
i++;
int d_count = 0;
while (i < len && str[i] == 'd') {
d_count++;
i++;
}
if (i < len && str[i] == 'e' && d_count >= 1) {
i++;
continue;
}
}
}

// If none of the patterns matched at current position


return false;
}

return true;
}

int main() {
char input[100];
printf("Enter a string to test against (ab*c + (def)+ + a*d+e)+: ");
scanf("%s", input);

if (matches_b(input)) {
printf("The string matches the pattern.\n");
} else {
printf("The string does not match the pattern.\n");
}

return 0;
}

c) ((a + b)*(c + d)*)+ + ab*c*d

#include <stdio.h>
#include <string.h>
#include <stdbool.h>

bool matches_c(const char *str) {


int len = strlen(str);

// Check for ((a + b)(c + d))* case


bool case1 = true;
for (int i = 0; i < len; i += 2) {
if (i + 1 >= len) {
case1 = false;
break;
}
if (!(str[i] == 'a' || str[i] == 'b')) {
case1 = false;
break;
}
if (!(str[i+1] == 'c' || str[i+1] == 'd')) {
case1 = false;
break;
}
}
if (case1) return true;

// Check for ab*c*d case


bool case2 = true;
int i = 0;

// Check for 'a'


if (i >= len || str[i] != 'a') {
case2 = false;
} else {
i++;
}

// Check for b*
while (i < len && str[i] == 'b') {
i++;
}

// Check for c*
while (i < len && str[i] == 'c') {
i++;
}

// Check for d
if (i != len - 1 || (i < len && str[i] != 'd')) {
case2 = false;
}

return case2;
}
int main() {
char input[100];
printf("Enter a string to test against ((a + b)(c + d))* + ab*c*d: ");
scanf("%s", input);

if (matches_c(input)) {
printf("The string matches the pattern.\n");
} else {
printf("The string does not match the pattern.\n");
}

return 0;
}

2. Design a Lexical analyzer for identifying different types of token used


in C language.

#include <stdio.h>
#include <ctype.h>
#include <string.h>
// Token types
enum {KEYWORD, IDENTIFIER, NUMBER, OPERATOR, PUNCTUATOR};

// C keywords
const char *kws[] = {"int","float","if","else","while","return"};

int isKeyword(char *s) {


for(int i=0; i<6; i++)
if(!strcmp(s,kws[i])) return 1;
return 0;
}

void analyze(char *src) {


char token[32];
int i=0, j, len=strlen(src);

while(i<len) {
// Skip whitespace
while(i<len && isspace(src[i])) i++;
if(i>=len) break;

// Check for numbers


if(isdigit(src[i])) {
j=0;
while(i<len && (isdigit(src[i]) || src[i]=='.'))
token[j++] = src[i++];
token[j]='\0';
printf("%s (NUMBER)\n",token);
continue;
}

// Check for identifiers/keywords


if(isalpha(src[i])) {
j=0;
while(i<len && isalnum(src[i]))
token[j++] = src[i++];
token[j]='\0';
printf("%s (%s)\n",token,isKeyword(token)?"KEYWORD":"IDENTIFIER");
continue;
}

// Check for operators


if(strchr("+-*/%=&|<>!",src[i])) {
printf("%c (OPERATOR)\n",src[i++]);
continue;
}

// Check for punctuators


if(strchr(";,(){}[]",src[i])) {
printf("%c (PUNCTUATOR)\n",src[i++]);
continue;
}

i++; // Skip unknown chars


}
}

int main() {
char code[] = "int main() { int x = 5+3; return 0; }";
analyze(code);
return 0;
}

3. Write a program which accepts a regular expression from the user


and generates a regular grammar which is equivalent to the R.E.
entered by user. The grammar will be printed to a text file, with only
one production rule in eac h line. Also, make sure that all production
rules are displayed in compact forms e.g. the production rules: S-->
aB, S--> cd S--> PQ Should be written as S--> aB | cd | PQ And not as
three different production rules. Also, there should not be any
repetition of product

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define MAX_PROD 100


#define MAX_RHS 50

typedef struct {
char lhs;
char rhs[MAX_RHS][20];
int rhs_count;
} Production;

Production productions[MAX_PROD];
int prod_count = 0;
char non_terminals[26] = {0};
char start_symbol = 'S';

void add_prod(char lhs, const char *rhs) {


for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == lhs) {
for (int j = 0; j < productions[i].rhs_count; j++)
if (strcmp(productions[i].rhs[j], rhs) == 0) return;
strcpy(productions[i].rhs[productions[i].rhs_count++], rhs);
return;
}
}
productions[prod_count].lhs = lhs;
strcpy(productions[prod_count].rhs[0], rhs);
productions[prod_count++].rhs_count = 1;
}

char new_nt() {
for (char c = 'A'; c <= 'Z'; c++)
if (!non_terminals[c - 'A'] && c != start_symbol) {
non_terminals[c - 'A'] = 1;
return c;
}
return '\0';
}

void re_to_grammar(const char *re, int s, int e, char lhs) {


if (s > e) return;
int i = s;
while (i <= e) {
if (re[i] == '(') {
int j = i + 1, p = 1;
while (j <= e && p > 0) {
if (re[j] == '(') p++;
if (re[j] == ')') p--;
j++;
}
j--;
if (j < e && re[j + 1] == '*') {
char nt = new_nt();
add_prod(lhs, "ε");
add_prod(lhs, nt);
re_to_grammar(re, i + 1, j - 1, nt);
add_prod(nt, nt);
i = j + 2;
} else if (j < e && re[j + 1] == '+') {
char nt = new_nt();
re_to_grammar(re, i + 1, j - 1, nt);
add_prod(nt, nt);
add_prod(lhs, nt);
i = j + 2;
} else {
re_to_grammar(re, i + 1, j - 1, lhs);
i = j + 1;
}
} else if (re[i] == '|') {
re_to_grammar(re, i + 1, e, lhs);
break;
} else if (strchr("*+", re[i])) {
i++;
} else {
if (i + 1 <= e && re[i + 1] == '*') {
char nt = new_nt();
add_prod(lhs, "ε");
add_prod(lhs, nt);
char r[2] = {re[i], '\0'};
add_prod(nt, r);
add_prod(nt, nt);
i += 2;
} else if (i + 1 <= e && re[i + 1] == '+') {
char nt = new_nt();
char r[2] = {re[i], '\0'};
add_prod(nt, r);
add_prod(nt, nt);
add_prod(lhs, nt);
i += 2;
} else {
if (i + 1 <= e && !strchr("|)*+", re[i + 1])) {
char nt = new_nt();
char r[3] = {re[i], nt, '\0'};
add_prod(lhs, r);
lhs = nt;
} else {
char r[2] = {re[i], '\0'};
add_prod(lhs, r);
}
i++;
}
}
}
}

void write_grammar(const char *fn) {


FILE *f = fopen(fn, "w");
if (!f) { printf("Error opening file!\n"); return; }
for (int i = 0; i < prod_count; i++) {
fprintf(f, "%c --> ", productions[i].lhs);
for (int j = 0; j < productions[i].rhs_count; j++)
fprintf(f, "%s%s", j ? " | " : "", productions[i].rhs[j]);
fprintf(f, "\n");
}
fclose(f);
printf("Grammar written to %s\n", fn);
}

int main() {
char re[100];
printf("Enter a regular expression: ");
scanf("%s", re);
non_terminals[start_symbol - 'A'] = 1;
re_to_grammar(re, 0, strlen(re) - 1, start_symbol);
write_grammar("grammar.txt");
return 0;
}

4. Write a program to eliminate left recursion

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define MAX_PROD 20
#define MAX_RHS 10
#define MAX_SYM 20

typedef struct {
char lhs;
char rhs[MAX_RHS][MAX_SYM];
int rhs_count;
} Production;

Production productions[MAX_PROD];
int prod_count = 0;

void add_production(char lhs, const char *rhs) {


for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == lhs) {
strcpy(productions[i].rhs[productions[i].rhs_count], rhs);
productions[i].rhs_count++;
return;
}
}

productions[prod_count].lhs = lhs;
strcpy(productions[prod_count].rhs[0], rhs);
productions[prod_count].rhs_count = 1;
prod_count++;
}

void print_grammar() {
printf("\nGrammar:\n");
for (int i = 0; i < prod_count; i++) {
printf("%c -> ", productions[i].lhs);
for (int j = 0; j < productions[i].rhs_count; j++) {
if (j > 0) printf(" | ");
printf("%s", productions[i].rhs[j]);
}
printf("\n");
}
}

int is_non_terminal(char c) {
return isupper(c);
}

void eliminate_left_recursion() {
for (int i = 0; i < prod_count; i++) {
char A = productions[i].lhs;

// Check for immediate left recursion


int has_left_recursion = 0;
for (int j = 0; j < productions[i].rhs_count; j++) {
if (productions[i].rhs[j][0] == A) {
has_left_recursion = 1;
break;
}
}

if (!has_left_recursion) continue;

// Create new non-terminal


char A_prime = A + 1; // Simple way to get a new non-terminal

// Separate productions into left recursive and others


char alpha[MAX_RHS][MAX_SYM], beta[MAX_RHS][MAX_SYM];
int alpha_count = 0, beta_count = 0;

for (int j = 0; j < productions[i].rhs_count; j++) {


if (productions[i].rhs[j][0] == A) {
strcpy(alpha[alpha_count], productions[i].rhs[j] + 1);
alpha_count++;
} else {
strcpy(beta[beta_count], productions[i].rhs[j]);
beta_count++;
}
}

// Remove the original production


productions[i].rhs_count = 0;

// Add new productions for A


for (int j = 0; j < beta_count; j++) {
char new_rhs[MAX_SYM];
strcpy(new_rhs, beta[j]);
strcat(new_rhs, (char[]){A_prime, '\0'});
add_production(A, new_rhs);
}

// Add new productions for A'


for (int j = 0; j < alpha_count; j++) {
char new_rhs[MAX_SYM];
strcpy(new_rhs, alpha[j]);
strcat(new_rhs, (char[]){A_prime, '\0'});
add_production(A_prime, new_rhs);
}
add_production(A_prime, "ε"); // Epsilon production
}
}

void input_grammar() {
printf("Enter productions (one per line, empty line to stop):\n");
printf("Format: A->aB|b\n");

char line[100];
while (1) {
fgets(line, sizeof(line), stdin);
if (line[0] == '\n') break;

// Parse the production


char lhs = line[0];
char *rhs_start = strchr(line, '>') + 1;

// Split multiple RHS alternatives


char *token = strtok(rhs_start, "|\n");
while (token != NULL) {
add_production(lhs, token);
token = strtok(NULL, "|\n");
}
}
}

int main() {
input_grammar();
print_grammar();

eliminate_left_recursion();

printf("\nGrammar after eliminating left recursion:\n");


print_grammar();

return 0;
}

5. Write a program for Recursive Descent Calculator.

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>

char *input;
int position = 0;

void error() {
fprintf(stderr, "Syntax error at position %d\n", position);
exit(1);
}

char peek() {
return input[position];
}

char consume() {
return input[position++];
}

int is_at_end() {
return input[position] == '\0';
}

void skip_whitespace() {
while (isspace(peek())) consume();
}

int number() {
int result = 0;
while (isdigit(peek())) {
result = result * 10 + (consume() - '0');
}
return result;
}

int expression();

int factor() {
skip_whitespace();
if (peek() == '(') {
consume(); // '('
int result = expression();
skip_whitespace();
if (peek() != ')') error();
consume(); // ')'
return result;
} else if (isdigit(peek())) {
return number();
} else {
error();
return 0;
}
}
int term() {
int result = factor();
skip_whitespace();
while (peek() == '*' || peek() == '/') {
char op = consume();
int next = factor();
if (op == '*') {
result *= next;
} else {
if (next == 0) {
fprintf(stderr, "Division by zero\n");
exit(1);
}
result /= next;
}
skip_whitespace();
}
return result;
}

int expression() {
int result = term();
skip_whitespace();
while (peek() == '+' || peek() == '-') {
char op = consume();
int next = term();
if (op == '+') {
result += next;
} else {
result -= next;
}
skip_whitespace();
}
return result;
}

int main() {
char buffer[256];
printf("Enter an arithmetic expression: ");
fgets(buffer, sizeof(buffer), stdin);
input = buffer;

int result = expression();

if (!is_at_end()) {
error();
}

printf("Result: %d\n", result);


return 0;
}

6. Write that recognizes different a program types of English words

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>

#define MAX_WORD_LENGTH 50

// Structure to hold word categories


typedef struct {
char word[MAX_WORD_LENGTH];
char category[20];
} WordEntry;

// Sample dictionary of words with their categories


WordEntry dictionary[] = {
{"run", "verb"}, {"jump", "verb"}, {"write", "verb"}, {"read", "verb"},
{"dog", "noun"}, {"cat", "noun"}, {"book", "noun"}, {"computer", "noun"},
{"happy", "adjective"}, {"sad", "adjective"}, {"quick", "adjective"}, {"slow",
"adjective"},
{"quickly", "adverb"}, {"slowly", "adverb"}, {"happily", "adverb"}, {"sadly",
"adverb"},
{"the", "article"}, {"a", "article"}, {"an", "article"},
{"in", "preposition"}, {"on", "preposition"}, {"at", "preposition"}, {"with",
"preposition"}
};

int dictionary_size = sizeof(dictionary) / sizeof(dictionary[0]);

// Function to get the category of a word


const char* get_word_category(const char *word) {
for (int i = 0; i < dictionary_size; i++) {
if (strcmp(dictionary[i].word, word) == 0) {
return dictionary[i].category;
}
}

// If not found, try to guess based on common suffixes


int len = strlen(word);

// Check for common noun suffixes


if (len >= 3) {
if (strcmp(word + len - 3, "ion") == 0 || strcmp(word + len - 3, "ity") == 0 ||
strcmp(word + len - 3, "ment") == 0 || strcmp(word + len - 3, "ness") == 0) {
return "noun";
}
}

// Check for common verb suffixes


if (len >= 2) {
if (strcmp(word + len - 2, "ed") == 0 || strcmp(word + len - 3, "ing") == 0) {
return "verb";
}
}
// Check for common adjective suffixes
if (len >= 3) {
if (strcmp(word + len - 3, "ful") == 0 || strcmp(word + len - 4, "able") == 0 ||
strcmp(word + len - 4, "ible") == 0 || strcmp(word + len - 2, "ic") == 0) {
return "adjective";
}
}

// Check for common adverb suffix


if (len >= 4 && strcmp(word + len - 2, "ly") == 0) {
return "adverb";
}

return "unknown";
}

// Function to check if a character is a word character


bool is_word_char(char c) {
return isalpha(c) || c == '\'';
}

// Function to extract words from a sentence


void classify_words_in_sentence(const char *sentence) {
char word[MAX_WORD_LENGTH];
int word_pos = 0;
int sentence_pos = 0;

printf("Word Classification:\n");
printf("-------------------\n");

while (sentence[sentence_pos] != '\0') {


// Skip non-word characters
while (sentence[sentence_pos] != '\0' && !
is_word_char(sentence[sentence_pos])) {
sentence_pos++;
}

// Extract word
word_pos = 0;
while (sentence[sentence_pos] != '\0' &&
is_word_char(sentence[sentence_pos])) {
word[word_pos++] = tolower(sentence[sentence_pos++]);
}
word[word_pos] = '\0';

// Classify and print if we found a word


if (word_pos > 0) {
printf("%-15s : %s\n", word, get_word_category(word));
}
}
}

int main() {
char sentence[256];

printf("Enter an English sentence: ");


fgets(sentence, sizeof(sentence), stdin);

// Remove newline character if present


size_t len = strlen(sentence);
if (len > 0 && sentence[len-1] == '\n') {
sentence[len-1] = '\0';
}

classify_words_in_sentence(sentence);

return 0;
}

7. Consider the following grammar: S --> ABC A--> abA | ab B--> b |


BC C--> c | cC Following any suitable parsing technique (prefer top-
down), design a parser which accepts a string and tells whether the
string is accepted by above grammar or not.

#include <stdio.h>
#include <string.h>
#include <stdbool.h>

const char *input;


int position = 0;

// Function prototypes
bool parse_S();
bool parse_A();
bool parse_B();
bool parse_C();

// Helper functions
char peek() {
return input[position];
}

void consume() {
position++;
}

bool match(char expected) {


if (peek() == expected) {
consume();
return true;
}
return false;
}

// Grammar rule implementations


bool parse_S() {
return parse_A() && parse_B() && parse_C();
}

bool parse_A() {
// A → abA | ab
if (match('a') && match('b')) {
if (peek() == 'a') {
return parse_A(); // abA case
}
return true; // ab case
}
return false;
}

bool parse_B() {
// B → b | BC
if (match('b')) {
if (peek() == 'c' || peek() == 'C') {
return parse_B() && parse_C(); // BC case
}
return true; // b case
}
return false;
}

bool parse_C() {
// C → c | cC
if (match('c')) {
if (peek() == 'c') {
return parse_C(); // cC case
}
return true; // c case
}
return false;
}

int main() {
char buffer[100];
printf("Enter a string to parse: ");
scanf("%s", buffer);
input = buffer;

if (parse_S() && position == strlen(input)) {


printf("String is accepted by the grammar.\n");
} else {
printf("String is NOT accepted by the grammar.\n");
}

return 0;
}

8. Write a program which accepts a regular grammar with no left-


recursion, and no null-production rules, and then it accepts a string
and reports whether the string is accepted by the grammar or not.

#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <ctype.h>
#define MAX_PROD 20
#define MAX_RHS 10
#define MAX_SYM 20

typedef struct {
char lhs;
char rhs[MAX_RHS][MAX_SYM];
int rhs_count;
} Production;

Production productions[MAX_PROD];
int prod_count = 0;
char start_symbol;

void add_production(char lhs, const char *rhs) {


for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == lhs) {
strcpy(productions[i].rhs[productions[i].rhs_count], rhs);
productions[i].rhs_count++;
return;
}
}

productions[prod_count].lhs = lhs;
strcpy(productions[prod_count].rhs[0], rhs);
productions[prod_count].rhs_count = 1;
prod_count++;
}

void input_grammar() {
printf("Enter productions (one per line, empty line to stop):\n");
printf("Format: A->aB|a\n");

char line[100];
while (1) {
fgets(line, sizeof(line), stdin);
if (line[0] == '\n') break;

// Parse the production


char lhs = line[0];
if (prod_count == 0) start_symbol = lhs;

char *rhs_start = strchr(line, '>') + 1;

// Split multiple RHS alternatives


char *token = strtok(rhs_start, "|\n");
while (token != NULL) {
add_production(lhs, token);
token = strtok(NULL, "|\n");
}
}
}

bool recognize_string(const char *str, char current_symbol, int str_pos) {


// Base case: end of string
if (str[str_pos] == '\0') {
// Check if current symbol can produce empty string (but grammar has no null
productions)
return false;
}

// Find productions for current symbol


for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == current_symbol) {
// Check each production alternative
for (int j = 0; j < productions[i].rhs_count; j++) {
const char *rhs = productions[i].rhs[j];

// Check if first symbol matches current input


if (rhs[0] == str[str_pos]) {
// Case 1: terminal followed by non-terminal (aB)
if (strlen(rhs) > 1 && isupper(rhs[1])) {
if (recognize_string(str, rhs[1], str_pos + 1)) {
return true;
}
}
// Case 2: single terminal (a) at end of string
else if (strlen(rhs) == 1 && str[str_pos + 1] == '\0') {
return true;
}
}
}
}
}

return false;
}

int main() {
input_grammar();

char str[100];
printf("Enter a string to check: ");
scanf("%s", str);

if (recognize_string(str, start_symbol, 0)) {


printf("String is accepted by the grammar.\n");
} else {
printf("String is NOT accepted by the grammar.\n");
}
return 0;
}

9. Design a parser which accepts a mathematical expression (containing


integers only). If the expression is valid, then evaluate the expression
else report that the expression is invalid. [Note: Design first the
Grammar and then implement using Shift-Reduce parsing technique.
Your program should generate an output file clearly showing each
step of parsing/evaluation of the intermediate sub-expressions.]

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#define MAX_STACK 100

typedef struct {
int type; // 0: number, 1: operator, 2: parenthesis
int value;
char op;
} Token;

typedef struct {
Token tokens[MAX_STACK];
int top;
} Stack;

void init_stack(Stack *s) {


s->top = -1;
}

void push(Stack *s, Token t) {


if (s->top >= MAX_STACK - 1) {
fprintf(stderr, "Stack overflow\n");
exit(1);
}
s->tokens[++s->top] = t;
}

Token pop(Stack *s) {


if (s->top < 0) {
fprintf(stderr, "Stack underflow\n");
exit(1);
}
return s->tokens[s->top--];
}

Token peek(Stack *s) {


return s->tokens[s->top];
}

int is_empty(Stack *s) {


return s->top == -1;
}

void print_stack(Stack *s, FILE *out) {


for (int i = 0; i <= s->top; i++) {
if (s->tokens[i].type == 0) {
fprintf(out, "%d ", s->tokens[i].value);
} else {
fprintf(out, "%c ", s->tokens[i].op);
}
}
fprintf(out, "\n");
}

int precedence(char op) {


switch(op) {
case '+':
case '-': return 1;
case '*':
case '/': return 2;
default: return 0;
}
}

int apply_op(int a, int b, char op) {


switch(op) {
case '+': return a + b;
case '-': return a - b;
case '*': return a * b;
case '/':
if (b == 0) {
fprintf(stderr, "Division by zero\n");
exit(1);
}
return a / b;
default: return 0;
}
}

void evaluate_expression(const char *expr, FILE *out) {


Stack values, ops;
init_stack(&values);
init_stack(&ops);

fprintf(out, "Parsing steps:\n");


fprintf(out, "--------------\n");

for (int i = 0; expr[i]; i++) {


if (expr[i] == ' ') continue;

if (expr[i] == '(') {
Token t = {2, 0, '('};
push(&ops, t);
fprintf(out, "Shift '(': ");
print_stack(&values, out);
}
else if (isdigit(expr[i])) {
int val = 0;
while (expr[i] && isdigit(expr[i])) {
val = val * 10 + (expr[i] - '0');
i++;
}
i--;

Token t = {0, val, 0};


push(&values, t);
fprintf(out, "Shift %d: ", val);
print_stack(&values, out);
}
else if (expr[i] == ')') {
fprintf(out, "Shift ')': ");
print_stack(&values, out);

while (!is_empty(&ops) && peek(&ops).op != '(') {


Token op = pop(&ops);
Token b = pop(&values);
Token a = pop(&values);

int res = apply_op(a.value, b.value, op.op);


Token result = {0, res, 0};
push(&values, result);

fprintf(out, "Reduce %d %c %d = %d: ", a.value, op.op, b.value, res);


print_stack(&values, out);
}

if (!is_empty(&ops) pop(&ops); // Pop '('


}
else {
while (!is_empty(&ops) && precedence(peek(&ops).op) >=
precedence(expr[i])) {
Token op = pop(&ops);
Token b = pop(&values);
Token a = pop(&values);

int res = apply_op(a.value, b.value, op.op);


Token result = {0, res, 0};
push(&values, result);

fprintf(out, "Reduce %d %c %d = %d: ", a.value, op.op, b.value, res);


print_stack(&values, out);
}

Token t = {1, 0, expr[i]};


push(&ops, t);
fprintf(out, "Shift '%c': ", expr[i]);
print_stack(&values, out);
}
}

while (!is_empty(&ops)) {
Token op = pop(&ops);
Token b = pop(&values);
Token a = pop(&values);

int res = apply_op(a.value, b.value, op.op);


Token result = {0, res, 0};
push(&values, result);

fprintf(out, "Reduce %d %c %d = %d: ", a.value, op.op, b.value, res);


print_stack(&values, out);
}

if (values.top == 0) {
fprintf(out, "\nFinal result: %d\n", values.tokens[0].value);
} else {
fprintf(out, "\nInvalid expression\n");
}
}

int main() {
char expr[100];
printf("Enter a mathematical expression (integers only): ");
fgets(expr, sizeof(expr), stdin);
expr[strcspn(expr, "\n")] = 0; // Remove newline

FILE *out = fopen("parsing_steps.txt", "w");


if (!out) {
perror("Failed to open output file");
return 1;
}

evaluate_expression(expr, out);
fclose(out);

printf("Parsing complete. Results written to parsing_steps.txt\n");


return 0;
}

10. Open Ended program: Designing of various type parser

#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>

// Recursive Descent Parser


bool rd_expr(const char *s, int *p);
bool rd_term(const char *s, int *p);
bool rd_factor(const char *s, int *p);
// Operator Precedence Parser
int op_expr(const char *s);

// Helper functions
int apply_op(int a, int b, char op);
int precedence(char op);

int main() {
char expr[100];
printf("Enter expression: ");
fgets(expr, 100, stdin);

// Recursive Descent check


int pos = 0;
bool valid = rd_expr(expr, &pos) && expr[pos] == '\n';
printf("Recursive Descent: %s\n", valid ? "Valid" : "Invalid");

// Operator Precedence evaluation


if(valid) {
int result = op_expr(expr);
printf("Operator Precedence Result: %d\n", result);
}

return 0;
}

// Recursive Descent Implementation


bool rd_expr(const char *s, int *p) {
if(!rd_term(s, p)) return false;
while(s[*p] == '+' || s[*p] == '-') {
(*p)++;
if(!rd_term(s, p)) return false;
}
return true;
}

bool rd_term(const char *s, int *p) {


if(!rd_factor(s, p)) return false;
while(s[*p] == '*' || s[*p] == '/') {
(*p)++;
if(!rd_factor(s, p)) return false;
}
return true;
}

bool rd_factor(const char *s, int *p) {


if(s[*p] == '(') {
(*p)++;
if(!rd_expr(s, p)) return false;
if(s[*p] != ')') return false;
(*p)++;
return true;
}
if(isdigit(s[*p])) {
while(isdigit(s[*p])) (*p)++;
return true;
}
return false;
}

// Operator Precedence Implementation


int op_expr(const char *s) {
int val = 0, stack[100], top = -1;
char ops[100], *p = (char*)s;

while(*p) {
if(isdigit(*p)) {
val = 0;
while(isdigit(*p))
val = val*10 + (*p++ - '0');
stack[++top] = val;
}
else if(*p == '(') {
ops[++top] = *p++;
}
else if(*p == ')') {
while(top >= 0 && ops[top] != '(') {
int b = stack[top--];
int a = stack[top--];
stack[++top] = apply_op(a, b, ops[top+1]);
}
top--; // Remove '('
p++;
}
else if(*p == '+' || *p == '-' || *p == '*' || *p == '/') {
while(top >= 0 && precedence(ops[top]) >= precedence(*p)) {
int b = stack[top--];
int a = stack[top--];
stack[++top] = apply_op(a, b, ops[top+1]);
}
ops[++top] = *p++;
}
else p++;
}

while(top >= 0) {
int b = stack[top--];
int a = stack[top--];
stack[++top] = apply_op(a, b, ops[top+1]);
}
return stack[0];
}

int apply_op(int a, int b, char op) {


switch(op) {
case '+': return a + b;
case '-': return a - b;
case '*': return a * b;
case '/': return a / b;
default: return 0;
}
}

int precedence(char op) {


if(op == '+' || op == '-') return 1;
if(op == '*' || op == '/') return 2;
return 0;
}

You might also like