compiler_design_lab_3
compiler_design_lab_3
22BRS1263
Assignment 3
Experiment 1
a) Construct Simple LR (SLR) parse table using C language.
b) Implement the LR parsing algorithm, get both parse table
and input string are inputs. Use C language for
implementation.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#define MAX_SYMBOLS 100
#define MAX_PRODUCTIONS 100
#define MAX_STATES 100
#define MAX_SYMBOL_LENGTH 10
#define MAX_PRODUCTION_LENGTH 50
#define MAX_STACK_SIZE 1000
typedef struct
{
int state;
char symbol[MAX_SYMBOL_LENGTH];
} StackEntry;
typedef struct
{
StackEntry entries[MAX_STACK_SIZE];
int top;
} Stack;
// Stack operations
// Structure to represent a production rule
typedef struct
{
char lhs[MAX_SYMBOL_LENGTH];
char rhs[MAX_PRODUCTION_LENGTH][MAX_SYMBOL_LENGTH];
int rhs_length;
} Production;
// Structure to represent an LR(0) item
typedef struct
{
int production_index;
int dot_position;
} Item;
// Structure to represent a set of LR(0) items
typedef struct
{
Item items[MAX_PRODUCTIONS * MAX_PRODUCTION_LENGTH];
int item_count;
} ItemSet;
// Structure to represent the parsing table
typedef struct
{
char action[MAX_STATES][MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int goto_table[MAX_STATES][MAX_SYMBOLS];
} ParsingTable;
// Global variables
Production productions[MAX_PRODUCTIONS];
int production_count = 0;
char terminals[MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int terminal_count = 0;
char non_terminals[MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int non_terminal_count = 0;
char start_symbol[MAX_SYMBOL_LENGTH];
ItemSet canonical_collection[MAX_STATES];
int state_count = 0;
char first_sets[MAX_SYMBOLS][MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int first_set_counts[MAX_SYMBOLS];
char follow_sets[MAX_SYMBOLS][MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int follow_set_counts[MAX_SYMBOLS];
// Function prototypes
void read_grammar_fixed();
void compute_first_sets();
void compute_follow_sets();
void build_canonical_collection();
void build_parsing_table(ParsingTable *table);
void print_parsing_table(ParsingTable *table);
bool is_terminal(const char *symbol);
bool is_non_terminal(const char *symbol);
int get_symbol_index(const char *symbol, const char
symbols[][MAX_SYMBOL_LENGTH], int
count);
void compute_closure(ItemSet *item_set);
void compute_goto(ItemSet *item_set, const char *symbol, ItemSet *result);
bool item_sets_equal(ItemSet *set1, ItemSet *set2);
int find_item_set(ItemSet *item_set);
void add_to_first_set(const char *non_terminal, const char *terminal);
void add_to_follow_set(const char *non_terminal, const char *terminal);
bool is_in_first_set(const char *non_terminal, const char *terminal);
bool is_in_follow_set(const char *non_terminal, const char *terminal);
void trim(char *str);
void initialize_stack(Stack *stack)
{
stack->top = -1;
}
void push(Stack *stack, int state, const char *symbol)
{
stack->top++;
stack->entries[stack->top].state = state;
strcpy(stack->entries[stack->top].symbol, symbol);
}
void pop(Stack *stack)
{
if (stack->top >= 0)
{
stack->top--;
}
}
// Function to tokenize input string
void tokenize_input(const char *input, char tokens[][MAX_SYMBOL_LENGTH],
int
*token_count)
{
char *str = strdup(input);
char *token = strtok(str, " \t\n");
*token_count = 0;
while (token != NULL)
{
strcpy(tokens[*token_count], token);
(*token_count)++;
token = strtok(NULL, " \t\n");
}
// Add end marker
strcpy(tokens[*token_count], "$");
(*token_count)++;
free(str);
}
// Function to parse input string
bool parse_input(const char *input_string, ParsingTable *table)
{
Stack stack;
initialize_stack(&stack);
// Initialize stack with state 0
push(&stack, 0, "$");
// Tokenize input
char tokens[MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int token_count;
tokenize_input(input_string, tokens, &token_count);
int current_token = 0;
printf("\nParsing Steps:\n");
printf("%-30s %-30s %-20s\n", "Stack", "Input", "Action");
printf("---------------------------------------------------------------\n")
;
while (true)
{
// Print current stack configuration
char stack_str[200] = "";
for (int i = 0; i <= stack.top; i++)
{
char temp[50];
sprintf(temp, "%s %d ", stack.entries[i].symbol, stack.entries[i].state);
strcat(stack_str, temp);
}
// Print remaining input
char input_str[200] = "";
for (int i = current_token; i < token_count; i++)
{
strcat(input_str, tokens[i]);
strcat(input_str, " ");
}
// Get action from parsing table
int current_state = stack.entries[stack.top].state;
char *current_symbol = tokens[current_token];
int symbol_index = get_symbol_index(current_symbol, terminals,
terminal_count);
if (symbol_index == -1)
{
printf("Error: Invalid input symbol %s\n", current_symbol);
return false;
}
char *action = table->action[current_state][symbol_index];
if (strlen(action) == 0)
{
printf("Error: No action defined for state %d and symbol %s\n",
current_state, current_symbol);
return false;
}
// Print formatted output
printf("%-30s %-30s %-20s\n", stack_str, input_str, action);
if (action[0] == 's')
{
int next_state = atoi(action + 1);
push(&stack, next_state, current_symbol);
current_token++;
}
else if (action[0] == 'r')
{
int production_index = atoi(action + 1);
Production *prod = &productions[production_index];
// Pop symbols in RHS
for (int i = 0; i < prod->rhs_length; i++)
{
pop(&stack);
}
int current_state = stack.entries[stack.top].state;
int non_terminal_index = get_symbol_index(prod->lhs, non_terminals,
non_terminal_count);
int next_state = table->goto_table[current_state][non_terminal_index];
push(&stack, next_state, prod->lhs);
printf("%-30s %-30s %-20s\n", stack_str, input_str, action);
printf("Reduce by %-5s -> ", prod->lhs);
for (int i = 0; i < prod->rhs_length; i++)
{
printf(" %s", prod->rhs[i]);
}
printf("\n");
}
else if (strcmp(action, "acc") == 0)
{
printf("Input accepted!\n");
return true;
}
else
{
printf("Error: Invalid action %s\n", action);
return false;
}
}
return false;
}
int main()
{
ParsingTable table;
char input_string[1000];
read_grammar_fixed();
compute_first_sets();
compute_follow_sets();
build_canonical_collection();
build_parsing_table(&table);
print_parsing_table(&table);
printf("\nEnter the input string to parse (tokens separated by spaces): ");
fgets(input_string, sizeof(input_string), stdin);
input_string[strcspn(input_string, "\n")] = 0; // Remove trailing newline
// Parse the input
if (!parse_input(input_string, &table))
{
printf("Parsing failed!\n");
}
return 0;
}
// Helper function to trim whitespace
void trim(char *str)
{
char *start = str;
char *end = str + strlen(str) - 1;
while (*start && isspace(*start))
start++;
while (end > start && isspace(*end))
end--;
*(end + 1) = '\0';
if (start > str)
{
memmove(str, start, end - start + 2);
}
}
void read_grammar_fixed()
{
char line[100];
printf("Enter the grammar productions (end with an empty line):\n");
printf("Format: A -> B C | D\n");
while (fgets(line, sizeof(line), stdin) && line[0] != '\n')
{
// Split on "->"
char *arrow_pos = strstr(line, "->");
if (!arrow_pos)
continue;
// Get the left-hand side
int lhs_len = arrow_pos - line;
char lhs[MAX_SYMBOL_LENGTH];
strncpy(lhs, line, lhs_len);
lhs[lhs_len] = '\0';
trim(lhs);
// Add to non-terminals if not already there
if (!is_non_terminal(lhs))
{
strcpy(non_terminals[non_terminal_count++], lhs);
}
// If it's the first production, set as start symbol
if (production_count == 0)
{
strcpy(start_symbol, lhs);
}
// Get the right-hand side and split on "|"
char *rhs = arrow_pos + 2;
char *alt_start = rhs;
bool has_more = true;
while (has_more)
{
char *pipe_pos = strchr(alt_start, '|');
char alternative[MAX_PRODUCTION_LENGTH];
if (pipe_pos)
{
int alt_len = pipe_pos - alt_start;
strncpy(alternative, alt_start, alt_len);
alternative[alt_len] = '\0';
alt_start = pipe_pos + 1;
}
else
{
strcpy(alternative, alt_start);
has_more = false;
}
trim(alternative);
// Create a new production
strcpy(productions[production_count].lhs, lhs);
productions[production_count].rhs_length = 0;
// Split the alternative on spaces
char *token_start = alternative;
char *space_pos;
bool tokens_remain = true;
while (tokens_remain && *token_start)
{
space_pos = strchr(token_start, ' ');
char symbol[MAX_SYMBOL_LENGTH];
if (space_pos)
{
int symbol_len = space_pos - token_start;
strncpy(symbol, token_start, symbol_len);
symbol[symbol_len] = '\0';
token_start = space_pos + 1;
// Skip consecutive spaces
while (*token_start && *token_start == ' ')
token_start++;
}
else
{
strcpy(symbol, token_start);
tokens_remain = false;
}
trim(symbol);
if (strlen(symbol) == 0)
continue;
// Add the symbol to the production
strcpy(productions[production_count].rhs[productions[production_count].rhs_
length++],
symbol);
// Determine if it's a terminal or non-terminal
if (isupper(symbol[0]))
{
if (!is_non_terminal(symbol))
{
strcpy(non_terminals[non_terminal_count++], symbol);
}
}
else if (strcmp(symbol, "epsilon") != 0)
{
if (!is_terminal(symbol))
{
strcpy(terminals[terminal_count++], symbol);
}
}
}
production_count++;
}
}
// Add $ as a terminal
if (!is_terminal("$"))
{
strcpy(terminals[terminal_count++], "$");
}
// Print the grammar
printf("\nGrammar:\n");
for (int i = 0; i < production_count; i++)
{
printf("%s ->", productions[i].lhs);
for (int j = 0; j < productions[i].rhs_length; j++)
{
printf(" %s", productions[i].rhs[j]);
}
printf("\n");
}
printf("\nTerminals: ");
for (int i = 0; i < terminal_count; i++)
{
printf("%s ", terminals[i]);
}
printf("\nNon-terminals: ");
for (int i = 0; i < non_terminal_count; i++)
{
printf("%s ", non_terminals[i]);
}
printf("\nStart symbol: %s\n", start_symbol);
}
bool is_terminal(const char *symbol)
{
for (int i = 0; i < terminal_count; i++)
{
if (strcmp(terminals[i], symbol) == 0)
{
return true;
}
}
return false;
}
bool is_non_terminal(const char *symbol)
{
for (int i = 0; i < non_terminal_count; i++)
{
if (strcmp(non_terminals[i], symbol) == 0)
{
return true;
}
}
return false;
}
int get_symbol_index(const char *symbol, const char
symbols[][MAX_SYMBOL_LENGTH], int
count)
{
for (int i = 0; i < count; i++)
{
if (strcmp(symbols[i], symbol) == 0)
{
return i;
}
}
return -1;
}
// Compute FIRST sets for all non-terminals
void compute_first_sets()
{
bool changed;
// Initialize FIRST sets
for (int i = 0; i < non_terminal_count; i++)
{
first_set_counts[i] = 0;
}
do
{
changed = false;
for (int i = 0; i < production_count; i++)
{
int nt_index = get_symbol_index(productions[i].lhs, non_terminals,
non_terminal_count);
if (productions[i].rhs_length == 0 || strcmp(productions[i].rhs[0],
"epsilon") == 0)
{
// If the production is A -> ε, add ε to FIRST(A)
if (!is_in_first_set(productions[i].lhs, "epsilon"))
{
add_to_first_set(productions[i].lhs, "epsilon");
changed = true;
}
}
else
{
bool all_derive_epsilon = true;
for (int j = 0; j < productions[i].rhs_length; j++)
{
if (is_terminal(productions[i].rhs[j]))
{
// If the symbol is a terminal, add it to FIRST(A)
if (!is_in_first_set(productions[i].lhs, productions[i].rhs[j]))
{
add_to_first_set(productions[i].lhs, productions[i].rhs[j]);
changed = true;
}
all_derive_epsilon = false;
break;
}
else if (is_non_terminal(productions[i].rhs[j]))
{
// If the symbol is a non-terminal, add FIRST(B) - {ε} to FIRST(A)
int symbol_index = get_symbol_index(productions[i].rhs[j], non_terminals,
non_terminal_count);
bool derives_epsilon = false;
for (int k = 0; k < first_set_counts[symbol_index]; k++)
{
if (strcmp(first_sets[symbol_index][k], "epsilon") == 0)
{
derives_epsilon = true;
}
else if (!is_in_first_set(productions[i].lhs, first_sets[symbol_index][k]))
{
add_to_first_set(productions[i].lhs, first_sets[symbol_index][k]);
changed = true;
}
}
if (!derives_epsilon)
{
all_derive_epsilon = false;
break;
}
}
}
// If all symbols in the right-hand side can derive epsilon, add epsilon to
FIRST(A)
if (all_derive_epsilon && !is_in_first_set(productions[i].lhs, "epsilon"))
{
add_to_first_set(productions[i].lhs, "epsilon");
changed = true;
}
}
}
} while (changed);
// Print FIRST sets
printf("\nFIRST sets:\n");
for (int i = 0; i < non_terminal_count; i++)
{
printf("FIRST(%s) = { ", non_terminals[i]);
for (int j = 0; j < first_set_counts[i]; j++)
{
printf("%s ", first_sets[i][j]);
}
printf("}\n");
}
}
// Compute FOLLOW sets for all non-terminals
void compute_follow_sets()
{
bool changed;
// Initialize FOLLOW sets
for (int i = 0; i < non_terminal_count; i++)
{
follow_set_counts[i] = 0;
}
// Add $ to FOLLOW(S) where S is the start symbol
add_to_follow_set(start_symbol, "$");
do
{
changed = false;
for (int i = 0; i < production_count; i++)
{
for (int j = 0; j < productions[i].rhs_length; j++)
{
if (is_non_terminal(productions[i].rhs[j]))
{
// Rule 2: If A -> αBβ, then FIRST(β) - {ε} is added to FOLLOW(B)
if (j < productions[i].rhs_length - 1)
{
if (is_terminal(productions[i].rhs[j + 1]))
{
if (!is_in_follow_set(productions[i].rhs[j], productions[i].rhs[j + 1]))
{
add_to_follow_set(productions[i].rhs[j], productions[i].rhs[j + 1]);
changed = true;
}
}
else if (is_non_terminal(productions[i].rhs[j + 1]))
{
int next_nt_index = get_symbol_index(productions[i].rhs[j + 1],
non_terminals, non_terminal_count);
for (int k = 0; k < first_set_counts[next_nt_index]; k++)
{
if (strcmp(first_sets[next_nt_index][k], "epsilon") != 0 &&
!is_in_follow_set(productions[i].rhs[j], first_sets[next_nt_index][k]))
{
add_to_follow_set(productions[i].rhs[j], first_sets[next_nt_index][k]);
changed = true;
}
}
}
}
// Rule 3: If A -> αB or A -> αBβ where FIRST(β) contains ε, then FOLLOW(A)
is
added to FOLLOW(B)
if (j == productions[i].rhs_length - 1 ||
(j < productions[i].rhs_length - 1 && is_non_terminal(productions[i].rhs[j
+ 1])
&&
is_in_first_set(productions[i].rhs[j + 1], "epsilon")))
{
int lhs_index = get_symbol_index(productions[i].lhs, non_terminals,
non_terminal_count);
int nt_index = get_symbol_index(productions[i].rhs[j], non_terminals,
non_terminal_count);
for (int k = 0; k < follow_set_counts[lhs_index]; k++)
{
if (!is_in_follow_set(productions[i].rhs[j], follow_sets[lhs_index][k]))
{
add_to_follow_set(productions[i].rhs[j], follow_sets[lhs_index][k]);
changed = true;
}
}
}
}
}
}
} while (changed);
// Print FOLLOW sets
printf("\nFOLLOW sets:\n");
for (int i = 0; i < non_terminal_count; i++)
{
printf("FOLLOW(%s) = { ", non_terminals[i]);
for (int j = 0; j < follow_set_counts[i]; j++)
{
printf("%s ", follow_sets[i][j]);
}
printf("}\n");
}
}
void add_to_first_set(const char *non_terminal, const char *terminal)
{
int nt_index = get_symbol_index(non_terminal, non_terminals,
non_terminal_count);
strcpy(first_sets[nt_index][first_set_counts[nt_index]++], terminal);
}
void add_to_follow_set(const char *non_terminal, const char *terminal)
{
int nt_index = get_symbol_index(non_terminal, non_terminals,
non_terminal_count);
strcpy(follow_sets[nt_index][follow_set_counts[nt_index]++], terminal);
}
bool is_in_first_set(const char *non_terminal, const char *terminal)
{
int nt_index = get_symbol_index(non_terminal, non_terminals,
non_terminal_count);
for (int i = 0; i < first_set_counts[nt_index]; i++)
{
if (strcmp(first_sets[nt_index][i], terminal) == 0)
{
return true;
}
}
return false;
}
bool is_in_follow_set(const char *non_terminal, const char *terminal)
{
int nt_index = get_symbol_index(non_terminal, non_terminals,
non_terminal_count);
for (int i = 0; i < follow_set_counts[nt_index]; i++)
{
if (strcmp(follow_sets[nt_index][i], terminal) == 0)
{
return true;
}
}
return false;
}
// Build the canonical collection of LR(0) items
void build_canonical_collection()
{
// Create the initial item set with S' -> .S
Item initial_item = {0, 0}; // Assuming the first production is the
augmented start
production
canonical_collection[0].items[0] = initial_item;
canonical_collection[0].item_count = 1;
compute_closure(&canonical_collection[0]);
state_count = 1;
// Build the canonical collection
for (int i = 0; i < state_count; i++)
{
// Find all symbols that appear after the dot in the items
char symbols[MAX_SYMBOLS][MAX_SYMBOL_LENGTH];
int symbol_count = 0;
for (int j = 0; j < canonical_collection[i].item_count; j++)
{
Item item = canonical_collection[i].items[j];
if (item.dot_position < productions[item.production_index].rhs_length)
{
char *symbol = productions[item.production_index].rhs[item.dot_position];
bool found = false;
for (int k = 0; k < symbol_count; k++)
{
if (strcmp(symbols[k], symbol) == 0)
{
found = true;
break;
}
}
if (!found)
{
strcpy(symbols[symbol_count++], symbol);
}
}
}
// For each symbol, compute the GOTO function
for (int j = 0; j < symbol_count; j++)
{
ItemSet goto_set;
goto_set.item_count = 0;
compute_goto(&canonical_collection[i], symbols[j], &goto_set);
if (goto_set.item_count > 0)
{
int existing_index = find_item_set(&goto_set);
if (existing_index == -1)
{
canonical_collection[state_count] = goto_set;
state_count++;
}
}
}
}
// Print the canonical collection
printf("\nCanonical Collection of LR(0) Items:\n");
for (int i = 0; i < state_count; i++)
{
printf("I%d:\n", i);
for (int j = 0; j < canonical_collection[i].item_count; j++)
{
Item item = canonical_collection[i].items[j];
printf(" %s ->", productions[item.production_index].lhs);
for (int k = 0; k < productions[item.production_index].rhs_length; k++)
{
if (k == item.dot_position)
{
printf(" .");
}
printf(" %s", productions[item.production_index].rhs[k]);
}
if (item.dot_position == productions[item.production_index].rhs_length)
{
printf(" .");
}
printf("\n");
}
}
}
void compute_closure(ItemSet *item_set)
{
bool changed;
do
{
changed = false;
for (int i = 0; i < item_set->item_count; i++)
{
Item item = item_set->items[i];
if (item.dot_position < productions[item.production_index].rhs_length)
{
char *symbol_after_dot =
productions[item.production_index].rhs[item.dot_position];
if (is_non_terminal(symbol_after_dot))
{
for (int j = 0; j < production_count; j++)
{
if (strcmp(productions[j].lhs, symbol_after_dot) == 0)
{
bool found = false;
for (int k = 0; k < item_set->item_count; k++)
{
if (item_set->items[k].production_index == j && item_set-
>items[k].dot_position == 0)
{
found = true;
break;
}
}
if (!found)
{
Item new_item = {j, 0};
item_set->items[item_set->item_count++] = new_item;
changed = true;
}
}
}
}
}
}
} while (changed);
}
void compute_goto(ItemSet *item_set, const char *symbol, ItemSet *result)
{
// Find all items where the dot is before the given symbol
for (int i = 0; i < item_set->item_count; i++)
{
Item item = item_set->items[i];
if (item.dot_position < productions[item.production_index].rhs_length &&
strcmp(productions[item.production_index].rhs[item.dot_position], symbol)
== 0)
{
// Move the dot one position to the right
Item new_item = {item.production_index, item.dot_position + 1};
result->items[result->item_count++] = new_item;
}
}
// Compute the closure of the resulting set
compute_closure(result);
}
bool item_sets_equal(ItemSet *set1, ItemSet *set2)
{
if (set1->item_count != set2->item_count)
{
return false;
}
for (int i = 0; i < set1->item_count; i++)
{
bool found = false;
for (int j = 0; j < set2->item_count; j++)
{
if (set1->items[i].production_index == set2->items[j].production_index &&
set1->items[i].dot_position == set2->items[j].dot_position)
{
found = true;
break;
}
}
if (!found)
{
return false;
}
}
return true;
}
int find_item_set(ItemSet *item_set)
{
for (int i = 0; i < state_count; i++)
{
if (item_sets_equal(item_set, &canonical_collection[i]))
{
return i;
}
}
return -1;
}
// Build the SLR parsing table
void build_parsing_table(ParsingTable *table)
{
// Initialize the parsing table
for (int i = 0; i < state_count; i++)
{
for (int j = 0; j < terminal_count; j++)
{
strcpy(table->action[i][j], "");
}
for (int j = 0; j < non_terminal_count; j++)
{
table->goto_table[i][j] = -1;
}
}
// Fill the action and goto tables
for (int i = 0; i < state_count; i++)
{
for (int j = 0; j < canonical_collection[i].item_count; j++)
{
Item item = canonical_collection[i].items[j];
if (item.dot_position < productions[item.production_index].rhs_length)
{
char *symbol = productions[item.production_index].rhs[item.dot_position];
ItemSet goto_set;
goto_set.item_count = 0;
compute_goto(&canonical_collection[i], symbol, &goto_set);
int goto_state = find_item_set(&goto_set);
if (goto_state != -1)
{
if (is_terminal(symbol))
{
int terminal_index = get_symbol_index(symbol, terminals, terminal_count);
sprintf(table->action[i][terminal_index], "s%d", goto_state);
}
else if (is_non_terminal(symbol))
{
int non_terminal_index = get_symbol_index(symbol, non_terminals,
non_terminal_count);
table->goto_table[i][non_terminal_index] = goto_state;
}
}
}
else
{
// Reduce actions
if (item.production_index == 0 && item.dot_position ==
productions[0].rhs_length)
{
// Accept action for the augmented production
int dollar_index = get_symbol_index("$", terminals, terminal_count);
strcpy(table->action[i][dollar_index], "acc");
}
else
{
char *lhs = productions[item.production_index].lhs;
for (int k = 0; k < terminal_count; k++)
{
if (is_in_follow_set(lhs, terminals[k]))
{
sprintf(table->action[i][k], "r%d", item.production_index);
}
}
}
}
}
}
}
// Print the SLR parsing table
void print_parsing_table(ParsingTable *table)
{
printf("\nSLR Parsing Table:\n");
// Print the header
printf(" ");
for (int i = 0; i < terminal_count; i++)
{
printf("%5s ", terminals[i]);
}
for (int i = 0; i < non_terminal_count; i++)
{
printf("%5s ", non_terminals[i]);
}
printf("\n");
// Print the table rows
for (int i = 0; i < state_count; i++)
{
printf("%2d ", i);
for (int j = 0; j < terminal_count; j++)
{
if (strlen(table->action[i][j]) > 0)
{
printf("%5s ", table->action[i][j]);
}
else
{
printf("%5s ", "");
}
}
for (int j = 0; j < non_terminal_count; j++)
{
if (table->goto_table[i][j] != -1)
{
printf("%5d ", table->goto_table[i][j]);
}
else
{
printf("%5s ", "");
}
}
printf("\n");
}
}
Input:
Output:
Conclusion: We implemented SLR parsing table and parser
Experiment2)
a)
Aim:- Construct Canonical LR (CLR) parse table using C
language.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct Rules
{
char var;
char der[10];
};
struct Item
{
int dotposition;
struct Rules r;
int lookahead[255];
int f;
};
struct State
{
int len;
struct Item itm[20];
int transition[255];
};
struct list
{
struct State data;
struct list* next;
};
int variables[26] = {0};
int terminals[255] = {0};
int nullable[26] = {0};
char first[26][255] = {{0}};
char follow[26][255] = {{0}};
char *var,*term;
char start;
int n,n_var = 0,n_term = 0;
struct Rules* a;
struct list* head,*tail;
int is_nullable(char* s)
{
char* p;
p = s;
while(*p!='\0')
{
if(*p<'A'||*p>'Z'||!nullable[*p-'A'])
return 0;
p++;
}
return 1;
}
int is_item_in(struct State* l,struct Rules r,int dot)
{
for(int i=0;i<l->len;i++)
{
if((l->itm[i].dotposition==dot)&&(l->itm[i].r.var==r.var)&&(strcmp(l->itm[i
].r.der,r.der)==0))
return i;
}
return -1;
}
int is_item_in_advanced(struct State* l,struct Rules r,int dot,int* bit)
{
int f = 0;
for(int i=0;i<l->len;i++)
{
f = 1;
for(int j=0;j<255;j++)
{
if(bit[j]!=l->itm[i].lookahead[j])
{
f = 0;break;
}
}
if(f&&(l->itm[i].dotposition==dot)&&(l->itm[i].r.var==r.var)&&(strcmp(l-
>itm[i].r.der,r.der)==0))
return 1;
}
return 0;
}
void fill_lookaheads(int* bit,struct Item* l)
{
//printf("fill\n");
int length = strlen(l->r.der+l->dotposition+1);
char sto;int f = 0;
for(int i=l->dotposition+1;i<l->dotposition+length+1;i++)
{
//printf("+\n");
if(l->r.der[i]=='\0')
continue;
if(l->r.der[i]<'A'||l->r.der[i]>'Z')
{
//printf("c = %c\n",l->r.der[i]);
bit[l->r.der[i]] = 1;
return;
}
for(int j=0;j<255;j++)
{
if(first[l->r.der[i]-'A'][j])
{
bit[j] = 1;
}
}
sto = l->r.der[i];
l->r.der[i] = '\0';
if(!is_nullable(l->r.der+l->dotposition+1))
{
l->r.der[i] = sto;
}
else
{
l->r.der[i] = sto;f = 1;break;
}
}
if(!f)
{
for(int i=0;i<255;i++)
{
if(l->lookahead[i])
bit[i] = 1;
}
}
//printf("fill_end\n");
}
void build_state(struct State* l)
{
int s;
//printf("start\n");
for(int i=0;i<l->len;i++)
{
//printf("*\n");
if(l->itm[i].r.der[l->itm[i].dotposition]>='A'&&l->itm[i].r.der[l->itm[i].d
otposition]<='Z')
{
//printf("yes\n");
for(int j=0;j<n;j++)
{
if((a[j].var==l->itm[i].r.der[l->itm[i].dotposition]))
{
if((s = is_item_in(l,a[j],0))==-1)
{
l->itm[l->len].dotposition = 0;
l->itm[l->len].r = a[j];
l->itm[l->len].f = 0;
memset(l->itm[l->len].lookahead,0,255);
fill_lookaheads(l->itm[l->len].lookahead,&l->itm[i]);
l->len++;
}
else
{
//printf("Nope\n");
// code to be added
fill_lookaheads(l->itm[s].lookahead,&l->itm[i]);
}
}
}
}
}
}
int state_already_included(struct list* l,struct State* s)
{
struct list* q;
q = l;
int f,rtn = -1;int ind = 0;
while(q!=NULL)
{
f = 0;
if(q->data.len!=s->len)
{
q = q->next;
ind++;
continue;
}
for(int i=0;i<s->len;i++)
{
if(!is_item_in_advanced(&q->data,s->itm[i].r,s->itm[i].dotposition,s->itm[i
].lookahead))
{
f = 1;break;
}
}
if(!f)
return ind;
ind++;q = q->next;
}
return -1;
}
void print_state(struct list* q)
{
for(int i=0;i<q->data.len;i++)
{
printf("%c :: ",q->data.itm[i].r.var);
if(q->data.itm[i].r.der[0]=='@')
q->data.itm[i].r.der[0] = '\0';
char sto = q->data.itm[i].r.der[q->data.itm[i].dotposition];
q->data.itm[i].r.der[q->data.itm[i].dotposition] = '\0';
printf("%s.",q->data.itm[i].r.der);
q->data.itm[i].r.der[q->data.itm[i].dotposition] = sto;
printf("%s",q->data.itm[i].r.der+q->data.itm[i].dotposition);
printf(" { ");
for(int j=0;j<255;j++)
{
if(q->data.itm[i].lookahead[j])
printf("%c,",(char)j);
}
printf(" }\n");
}
}
int num=0;
void find_out_states(struct list* l)
{
if(l==NULL)
return;
for(int i=0;i<l->data.len;i++)
{
if(l->data.itm[i].f)
continue;
else if(l->data.itm[i].dotposition==strlen(l->data.itm[i].r.der))
{
l->data.itm[i].f = 1;
continue;
}
//printf("here\n");
struct list* t;
t = (struct list*)malloc(sizeof(struct list));
for(int ind=0;ind<255;ind++)
{
t->data.transition[ind] = -1;
}
t->data.len = 1;
t->data.itm[0].dotposition = l->data.itm[i].dotposition+1;
t->data.itm[0].r = l->data.itm[i].r;
for(int ind=0;ind<255;ind++)
t->data.itm[0].lookahead[ind] = l->data.itm[i].lookahead[ind];
l->data.itm[i].f = 1;
for(int j=i+1;j<l->data.len;j++)
{
if(l->data.itm[j].r.der[l->data.itm[j].dotposition]==l->data.itm[i].r.der[l
-
>data.itm[i].dotposition])
{
//t->data.len = 1;
t->data.itm[t->data.len].dotposition = l->data.itm[j].dotposition+1;
t->data.itm[t->data.len].r = l->data.itm[j].r;
memset(t->data.itm[t->data.len].lookahead,0,255);
for(int ind=0;ind<255;ind++)
t->data.itm[t->data.len].lookahead[ind] = l->data.itm[j].lookahead[ind];
l->data.itm[j].f = 1;
t->data.len++;
}
}
build_state(&t->data);
int s;
if((s = state_already_included(head,&t->data))==-1)
{
tail->next = t;
tail = t;
tail->next = NULL;
l->data.transition[l->data.itm[i].r.der[l->data.itm[i].dotposition]] = num;
num++;
for(int ii=0;ii<t->data.len;ii++)
{
if(t->data.itm[i].r.der[0]=='@')
t->data.itm[i].r.der[0] = '\0';
}
}
else
{
l->data.transition[l->data.itm[i].r.der[l->data.itm[i].dotposition]] = s;
}
}
find_out_states(l->next);
}
struct Table
{
char op;
int state_no;
};
int find(char c)
{
for(int i=0;i<n_term;i++)
if(term[i]==c)
return i;
for(int i=0;i<n_var;i++)
if(var[i]==c)
return n_term+i;
return 0;
}
int find_rule(struct Rules r)
{
for(int i=0;i<n;i++)
{
if(a[i].var==r.var&&strcmp(a[i].der,r.der)==0)
return i+1;
}
return -1;
}
void construct_table(struct Table** tab,int num)
{
struct list* q;int k;
q = head;
for(int i=0;i<num;i++)
{
for(int j=0;j<255;j++)
{
if(q->data.transition[j]!=-1)
{
k = find(j);
if(j>='A'&&j<='Z')
{
tab[i][k].state_no = q->data.transition[j];
}
else
{
tab[i][k].op = 'S';
tab[i][k].state_no = q->data.transition[j];
}
}
}
for(int j=0;j<q->data.len;j++)
{
if(q->data.itm[j].dotposition==strlen(q->data.itm[j].r.der))
{
if(q->data.itm[j].r.var=='#')
{
//printf("hey!!!!\n");
k = find('$');
//printf("state: %d Column: %d\n",i,k);
tab[i][k].op = 'A';
tab[i][k].state_no = 0;continue;
}
int nn = find_rule(q->data.itm[j].r);
for(int l=0;l<255;l++)
{
if(q->data.itm[j].lookahead[l])//if(follow[q->data.itm[j].r.var-'A'][l])
{
k = find(l);
if(tab[i][k].state_no==-1)
{
tab[i][k].op = 'R';
tab[i][k].state_no = nn;
}
else
{
printf("A Shift-Reduce conflict has taken place in state: %d\n",i);
printf("The operators involved are: %c (for shift), %c (for
reduce)\n",term[k],a[nn-
1].der[1]);
printf("Press 1. for shift 2. for reduce\n");
int d;
scanf("%d",&d);while(getchar()!='\n');
if(d==2)
{
tab[i][k].op = 'R';
tab[i][k].state_no = nn;
}
}
}
}
}
}
q = q->next;
}
}
int main()
{
printf("Enter the no of rules\n");
scanf("%d",&n);
while(getchar()!='\n');
a = (struct Rules*)malloc(sizeof(struct Rules)*n);
for(int i=0;i<n;i++)
{
printf("Enter the variable\n");
scanf("%c",&a[i].var);
if(variables[a[i].var-'A'] != 1)
{
//printf("%d\n",a[i].var-'A');
variables[a[i].var-'A'] = 1;n_var++;
}
while(getchar()!='\n');
printf("Enter the derivation\n");
scanf("%s",a[i].der);
for(int j=0;j<strlen(a[i].der);j++)
{
if(a[i].der[j]!='@'&&(a[i].der[j]<'A'||a[i].der[j]>'Z')&&terminals[a[i].der
[j]] != 1)
{
terminals[a[i].der[j]] = 1;n_term++;
}
}
while(getchar()!='\n');
}
var = (char*)malloc(sizeof(char)*n_var);int ind = 0;
for(int i=0;i<26;i++)
{
if(variables[i])
var[ind++] = 'A'+i;
}
n_term++;
term = (char*)malloc(sizeof(char)*(n_term));ind = 0;
for(int i=0;i<255;i++)
{
if(terminals[i])
term[ind++] = (char)i;
}
term[ind++] = '$';
int no_change = 0;
do
{
no_change = 0;
for(int i=0;i<n;i++)
{
if(strlen(a[i].der)==1&&a[i].der[0]=='@')
{
if(!nullable[a[i].var-'A'])
{
no_change = 1;
nullable[a[i].var-'A'] = 1;
}
}
else if(is_nullable(a[i].der))
{
if(!nullable[a[i].var-'A'])
{
no_change = 1;
nullable[a[i].var-'A'] = 1;
}
}
}
}while(no_change);
// calculating the first
do
{
no_change = 0;
for(int i=0;i<n;i++)
{
if(a[i].der[0]!='@')
{
if(a[i].der[0]>='A'&&a[i].der[0]<='Z')
{
char sto;
for(int j=0;j<strlen(a[i].der);j++)
{
sto = a[i].der[j];
a[i].der[j] = '\0';
if(is_nullable(a[i].der))
{
//printf("*\n");
a[i].der[j] = sto;
if(sto>='A'&&sto<='Z')
{
for(int k=0;k<255;k++)
{
if(first[sto-'A'][k]&&!first[a[i].var-'A'][k])
{
no_change = 1;
first[a[i].var-'A'][k] = 1;
}
}
}
else if(!first[a[i].var-'A'][sto])
{
no_change = 1;
first[a[i].var-'A'][sto] = 1;
break;
}
}
else
{
a[i].der[j] = sto;
break;
}
}
}
else if(!first[a[i].var-'A'][a[i].der[0]])
{
no_change = 1;
first[a[i].var-'A'][a[i].der[0]] = 1;
break;
}
}
}
}while(no_change);
// finding the follow
start = 'S';//argv[1][0];
follow[start-'A']['$'] = 1; //sentinel
do
{
no_change = 0;
for(int i=0;i<n;i++)
{
if(a[i].der[0]!='@')
{
for(int j=strlen(a[i].der)-1;j>=0;j--)
{
if(a[i].der[j]>='A'&&a[i].der[j]<='Z'&&is_nullable(a[i].der+j+1))
{
for(int k=0;k<255;k++)
{
if(follow[a[i].var-'A'][k]&&!follow[a[i].der[j]-'A'][k])
{
no_change = 1;
follow[a[i].der[j]-'A'][k] = 1;
}
}
}
if(a[i].der[j]>='A'&&a[i].der[j]<='Z')
for(int k=j+1;k<strlen(a[i].der);k++)
{
char sto = a[i].der[k];
a[i].der[k] = '\0';
if(is_nullable(a[i].der+j+1))
{
a[i].der[k] = sto;
if(sto>='A'&&sto<='Z')
{
for(int l=0;l<255;l++)
{
if(first[sto-'A'][l]&&!follow[a[i].der[j]-'A'][l])
{
//printf("l = %c",(char)l);
no_change = 1;
follow[a[i].der[j]-'A'][l] = 1;
}
}
}
else
{
if(!follow[a[i].der[j]-'A'][sto])
{
//printf("sto = %c\n",sto);
no_change = 1;
follow[a[i].der[j]-'A'][sto] = 1;
break;
}
}
}
else
{
a[i].der[k] = sto;break;
}
}
}
}
}
}while(no_change);
head = (struct list*)malloc(sizeof(struct list));
tail = head;
head->data.len = 1;
head->data.itm[0].r.var = '#';
head->data.itm[0].r.der[0] = start;
head->data.itm[0].r.der[1] = '\0';
head->data.itm[0].dotposition = 0;
head->data.itm[0].f = 0;
memset(head->data.itm[0].lookahead,0,255);
head->data.itm[0].lookahead['$'] = 1;
for(int i=0;i<255;i++)
{
head->data.transition[i] = -1;
}
build_state(&head->data);
struct list* q;
q = head;
for(int i=0;i<q->data.len;i++)
{
//printf("%c :: ",q->data.itm[i].r.var);
if(q->data.itm[i].r.der[0]=='@')
q->data.itm[i].r.der[0] = '\0';
}
head->next = NULL;
tail = head;num++;
find_out_states(head);
q = head;int num1 = 0;
while(q!=NULL)
{
printf("******** I%d *********\n",num1);
print_state(q);
q = q->next;
num1++;
}
struct Table** tab;
tab = (struct Table**)malloc(sizeof(struct Table*)*num);
for(int i=0;i<num;i++)
{
tab[i] = (struct Table*)malloc(sizeof(struct Table)*(n_var+n_term));
for(int j=0;j<n_var+n_term;j++)
{
tab[i][j].state_no = -1;
}
}
for(int i=0;i<n;i++)
if(a[i].der[0]=='@')
a[i].der[0] = '\0';
construct_table(tab,num);
printf("%8s"," ");
for(int i=0;i<n_term;i++)
{
printf("%8c",term[i]);
}
//printf("\n");
for(int i=0;i<n_var;i++)
printf("%8c",var[i]);
printf("\n");
for(int i=0;i<num;i++)
{
printf("%7d:",i);
for(int j=0;j<n_term+n_var;j++)
{
if(tab[i][j].state_no!=-1)
{
printf("%7c%d",tab[i][j].op,tab[i][j].state_no);
}
else
printf("%8s","-");
}
printf("\n");
}
return 0;
}
Input:
Output:
Conclusion: We implemented an SLR parsing table.
tb)
Aim: Implement the LR parsing algorithm, get both parse
table and input string are inputs. Use C language for
implementation.
Code:
#include <iostream>
#include <stack>
#include <vector>
#include <string>
#include <map>
#include <sstream>
#include <set>
#include <iomanip>
#include <algorithm>
using namespace std;
struct TableEntry {
char action; // 's' for shift, 'r' for reduce, 'a' for accept
int number; // state number or production number
TableEntry(char a = ' ', int n = 0) : action(a), number(n) {}
};
struct Production {
string lhs;
string rhs;
int rhsLength;
Production(string l, string r) : lhs(l), rhs(r) {
stringstream ss(r);
string token;
rhsLength = 0;
while (ss >> token) rhsLength++;
}
};
class CLRParser {
private:
map<pair<int, string>, TableEntry> parsingTable;
vector<Production> productions;
stack<int> stateStack;
stack<string> symbolStack;
set<string> terminals;
set<string> nonTerminals;
string getStackContents() {
// Make copies of the stacks
stack<int> stateTemp = stateStack;
stack<string> symbolTemp = symbolStack;
// Vectors to store reversed stack contents
vector<int> states;
vector<string> symbols;
// Extract elements from stacks
while (!stateTemp.empty()) {
states.push_back(stateTemp.top());
stateTemp.pop();
}
while (!symbolTemp.empty()) {
symbols.push_back(symbolTemp.top());
symbolTemp.pop();
}
// Reverse to get correct order (bottom to top)
reverse(states.begin(), states.end());
reverse(symbols.begin(), symbols.end());
// Build the result string with alternating state and symbol
string result = to_string(states[0]); // Starting state is always 0
// Start from index 1 for symbols since $ is at the bottom
for (size_t i = 1; i < symbols.size(); i++) {
result += " " + symbols[i]; // Add symbol
if (i < states.size()) {
result += " " + to_string(states[i]); // Add state
}
}
return result;
}
string getRemainingInput(const vector<string>& tokens, int currentPos) {
string result;
for (int i = currentPos; i < tokens.size(); i++) {
result += tokens[i] + " ";
}
result += "$";
return result;
}
void printTableHeader() {
cout << setfill('-') << setw(80) << "-" << endl;
cout << setfill(' ');
cout << left << setw(30) << "Stack"
<< left << setw(30) << "Input"
<< left << setw(20) << "Action" << endl;
cout << setfill('-') << setw(80) << "-" << endl;
cout << setfill(' ');
}
void printTableRow(const string& stack, const string& input, const string&
action) {
cout << left << setw(30) << stack
<< left << setw(30) << input
<< left << setw(20) << action << endl;
}
vector<string> tokenize(const string& input) {
vector<string> tokens;
stringstream ss(input);
string token;
while (ss >> token) {
tokens.push_back(token);
}
return tokens;
}
public:
void inputGrammar() {
cout << "Enter number of productions: ";
int n;
cin >> n;
cin.ignore();
cout << "Enter productions in format 'LHS -> RHS' (use spaces between
symbols)\n";
cout << "Example: S -> C C\n";
for(int i = 0; i < n; i++) {
string line;
getline(cin, line);
size_t arrow = line.find("->");
if(arrow != string::npos) {
string lhs = line.substr(0, arrow);
string rhs = line.substr(arrow + 2);
// Trim whitespace
lhs = lhs.substr(0, lhs.find_last_not_of(" \t") + 1);
rhs = rhs.substr(rhs.find_first_not_of(" \t"));
productions.push_back(Production(lhs, rhs));
nonTerminals.insert(lhs);
// Extract terminals from RHS
stringstream ss(rhs);
string token;
while(ss >> token) {
if(token[0] >= 'a' && token[0] <= 'z') {
terminals.insert(token);
}
}
}
}
}
void inputParsingTable() {
cout << "Enter number of states: ";
int states;
cin >> states;
cin.ignore();
cout << "Enter parsing table entries in following formats:\n";
cout << "For terminals: state symbol s/r number\n";
cout << "For non-terminals: state symbol number (this is GOTO)\n";
cout << "Example for terminal: 0 c s 3\n";
cout << "Example for non-terminal: 0 S 1\n";
cout << "Enter 'done' when finished\n";
while(true) {
string line;
getline(cin, line);
if(line == "done") break;
stringstream ss(line);
int state;
string symbol;
string actionOrNumber;
if(ss >> state >> symbol >> actionOrNumber) {
if(nonTerminals.find(symbol) != nonTerminals.end()) {
int gotoState = stoi(actionOrNumber);
parsingTable[{state, symbol}] = TableEntry('g', gotoState);
}
else {
int number;
if(ss >> number) {
char action = actionOrNumber[0];
parsingTable[{state, symbol}] = TableEntry(action, number);
}
}
}
}
}
bool parse(const string& input) {
while (!stateStack.empty()) stateStack.pop();
while (!symbolStack.empty()) symbolStack.pop();
stateStack.push(0);
symbolStack.push("$");
vector<string> tokens = tokenize(input);
int currentToken = 0;
printTableHeader();
while (true) {
int currentState = stateStack.top();
string currentSymbol = (currentToken < tokens.size()) ?
tokens[currentToken] : "$";
string stackContent = getStackContents();
string remainingInput = getRemainingInput(tokens, currentToken);
auto entry = parsingTable.find({currentState, currentSymbol});
if (entry == parsingTable.end()) {
printTableRow(stackContent, remainingInput, "Error: No action defined");
return false;
}
TableEntry action = entry->second;
if (action.action == 's') {
string actionStr = "Shift " + to_string(action.number);
printTableRow(stackContent, remainingInput, actionStr);
stateStack.push(action.number);
symbolStack.push(currentSymbol);
currentToken++;
}
else if (action.action == 'r') {
Production prod = productions[action.number - 1];
string actionStr = "Reduce by " + prod.lhs + " -> " + prod.rhs;
printTableRow(stackContent, remainingInput, actionStr);
for (int i = 0; i < prod.rhsLength; i++) {
stateStack.pop();
symbolStack.pop();
}
symbolStack.push(prod.lhs);
int gotoState = stateStack.top();
auto gotoEntry = parsingTable.find({gotoState, prod.lhs});
if (gotoEntry == parsingTable.end()) {
printTableRow(getStackContents(), remainingInput, "Error: No goto
defined");
return false;
}
stateStack.push(gotoEntry->second.number);
}
else if (action.action == 'a') {
printTableRow(stackContent, remainingInput, "Accept");
return true;
}
}
return false;
}
};
int main() {
CLRParser parser;
parser.inputGrammar();
parser.inputParsingTable();
cout << "Enter input string to parse (tokens space-separated): ";
string input;
getline(cin, input);
if (parser.parse(input)) {
cout << "\nParsing completed successfully!" << endl;
} else {
cout << "\nParsing failed!" << endl;
}
return 0;
}
Input:
Output: