Sec B
Sec B
SUBMITTED TO : SUBMITTED BY :
Ms. EKTA BOHARA KUNAL RANJAN
BT22CSE207
5TH SEMESTER ‘B’
INDEX
OBJECTIVE : Design a lexical analyzer for the given language & the lexical analyzer should
ignore redundant spaces, tabs and new lines
THEORY : Here's a basic implementation of a lexical analyzer (lexer) in C++ that processes an
input string (code) and breaks it down into tokens, while ignoring redundant spaces, tabs,
and newlines.
Steps:
1. Define a list of valid tokens (keywords, operators, literals, etc.).
2. Implement functions to recognize these tokens from the input.
3. Ignore redundant whitespace characters (spaces, tabs, newlines) during tokenization.
Assumptions:
The language has keywords like if, else, while, etc.
Operators include +, -, =, *, /, etc.
Identifiers are alphabetic strings.
Numbers are sequences of digits.
PROGRAM:
#include <iostream>
#include <string>
#include <cctype>
#include <vector>
#include <unordered_set>
using namespace std;
// Define a set of keywords
unordered_set<string> keywords = {"if", "else", "while", "return", "int", "float"};
// Token structure
struct Token {
string type;
string value;
};
// Function to check if a string is a keyword
bool isKeyword(const string &str) {
return keywords.find(str) != keywords.end();
}
// Function to recognize identifiers (alphanumeric starting with alphabet)
bool isIdentifier(char ch) {
return isalpha(ch) || ch == '_';
}
// Function to recognize operators
bool isOperator(char ch) {
return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '=' || ch == '<' || ch == '>';
}
// Function to recognize numeric literals
bool isDigit(char ch) {
return isdigit(ch);
}
// Lexical analyzer function
vector<Token> lexicalAnalyzer(const string &input) {
vector<Token> tokens;
int i = 0;
int length = input.size();
while (i < length) {
// Skip spaces, tabs, and newlines
if (isspace(input[i])) {
i++;
continue;
}
// Process keywords and identifiers
if (isIdentifier(input[i])) {
string identifier = "";
while (i < length && (isalnum(input[i]) || input[i] == '_')) {
identifier += input[i];
i++;
}
if (isKeyword(identifier)) {
tokens.push_back({"KEYWORD", identifier});
} else {
tokens.push_back({"IDENTIFIER", identifier});
}
}
// Process numbers
else if (isDigit(input[i])) {
string number = "";
while (i < length && isDigit(input[i])) {
number += input[i];
i++;
}
tokens.push_back({"NUMBER", number});
}
// Process operators
else if (isOperator(input[i])) {
string op(1, input[i]);
tokens.push_back({"OPERATOR", op});
i++;
}
// Process delimiters like parentheses, semicolons, etc.
else if (input[i] == '(' || input[i] == ')' || input[i] == '{' || input[i] == '}' || input[i] == ';') {
string delimiter(1, input[i]);
tokens.push_back({"DELIMITER", delimiter});
i++;
}
// Handle unknown characters (error handling)
else {
cout << "Unknown token: " << input[i] << endl;
i++;
}
}
return tokens;
}
// Main function
int main() {
string code = "int x = 10; while (x > 0) { x = x - 1; }";
vector<Token> tokens = lexicalAnalyzer(code);
// Output tokens
for (const auto &token : tokens) {
cout << "Token Type: " << token.type << ", Value: " << token.value << endl;
}
return 0;
}
INPUT :
int x = 10; while (x > 0) { x = x - 1; }
OUTPUT :
Token Type: KEYWORD, Value: int
Token Type: IDENTIFIER, Value: x
Token Type: OPERATOR, Value: =
Token Type: NUMBER, Value: 10
Token Type: DELIMITER, Value: ;
Token Type: KEYWORD, Value: while
Token Type: DELIMITER, Value: (
Token Type: IDENTIFIER, Value: x
Token Type: OPERATOR, Value: >
Token Type: NUMBER, Value: 0
Token Type: DELIMITER, Value: )
Token Type: DELIMITER, Value: {
Token Type: IDENTIFIER, Value: x
Token Type: OPERATOR, Value: =
Token Type: IDENTIFIER, Value: x
Token Type: OPERATOR, Value: -
Token Type: NUMBER, Value: 1
Token Type: DELIMITER, Value: ;
Token Type: DELIMITER, Value: }
This lexical analyzer can be extended by adding more operators, keywords, and handling
more complex rules.
Practical 02
OBJECTIVE : Write a C++ program to identify whether a given line is a comment or not and
whether a given identifier is valid or not
THEORY : Below is a C++ program that checks whether a given line is a comment and
whether a given identifier is valid according to C++ identifier naming rules:
1. Comment Check: In C++, a line is a comment if it starts with // for a single-line comment
or /* and ends with */ for a multi-line comment.
2. Identifier Check: A valid identifier in C++:
Must start with an alphabetic character (a-z, A-Z) or an underscore (_).
After the first character, it can contain alphanumeric characters (a-z, A-Z, 0-9) or
underscores (_).
It cannot be a keyword.
PROGRAM :
#include <iostream>
#include <cctype>
#include <string>
#include <vector>
using namespace std;
// Function to check if a line is a comment
bool isComment(const string& line) {
// Check for single-line comment
if (line.substr(0, 2) == "//") {
return true;
}
// Check for multi-line comment (basic check for start and end)
if (line.substr(0, 2) == "/" && line.find("/") != string::npos) {
return true;
}
return false;
}
// List of C++ keywords
vector<string> cpp_keywords = {
"auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else",
"enum",
"extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed",
"sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile",
"while"
};
// Function to check if a string is a C++ keyword
bool isKeyword(const string& identifier) {
for (const string& keyword : cpp_keywords) {
if (identifier == keyword) {
return true;
}
}
return false;
}
// Function to check if an identifier is valid
bool isValidIdentifier(const string& identifier) {
if (identifier.empty()) return false;
// Check if the first character is a letter or underscore
if (!isalpha(identifier[0]) && identifier[0] != '_') {
return false;
}
// Check the rest of the identifier for alphanumeric characters or underscores
for (size_t i = 1; i < identifier.size(); i++) {
if (!isalnum(identifier[i]) && identifier[i] != '_') {
return false;
}
}
// Check if it's a keyword
if (isKeyword(identifier)) {
return false;
}
return true;
}
int main() {
string line;
cout << "Enter a line of code: ";
getline(cin, line);
// Check if the line is a comment
if (isComment(line)) {
cout << "The line is a comment." << endl;
} else {
cout << "The line is not a comment." << endl;
}
string identifier;
cout << "Enter an identifier to check: ";
cin >> identifier;
// Check if the identifier is valid
if (isValidIdentifier(identifier)) {
cout << "The identifier \"" << identifier << "\" is valid." << endl;
} else {
cout << "The identifier \"" << identifier << "\" is not valid." << endl;
}
return 0;
}
INPUT :
int x = 10;
_ValidIdentifier123
OUTPUT :
Enter a line of code:
The line is not a comment.
Enter an identifier to check:
The identifier "_ValidIdentifier123" is valid.
Practical 03
PROGRAMS :
a) Lex program to count characters, lines, and words from a text file (in C++)
%{
#include <iostream>
using namespace std;
int chars = 0, lines = 0, words = 0;
%}
%%
[a-zA-Z]+ { words++; chars += yyleng; } /* Match words */
\n { lines++; chars++; } /* Match new lines */
. { chars++; } /* Match any other characters */
%%
int main() {
yylex(); // Call lexer
cout << "Number of characters: " << chars << endl;
cout << "Number of lines: " << lines << endl;
cout << "Number of words: " << words << endl;
return 0;
}
int yywrap() {
return 1;
}
INPUT :
Hello World
This is a test.
OUTPUT :
Number of characters: 24
Number of lines: 2
Number of words: 5
b) Lex program to count vowels and consonants in a given input string (in C++)
%{
#include <iostream>
using namespace std;
int vowels = 0, consonants = 0;
%}
%%
[aeiouAEIOU] { vowels++; } /* Match vowels */
[a-zA-Z] { consonants++; } /* Match consonants */
. { /* Ignore other characters */ }
%%
int main() {
cout << "Enter a string: ";
yylex(); // Call lexer
cout << "Number of vowels: " << vowels << endl;
cout << "Number of consonants: " << consonants << endl;
return 0;
}
int yywrap() {
return 1;
}
INPUT :
Hello World
OUTPUT :
Enter a string:
Number of vowels: 3
Number of consonants: 7
Practical 04
PROGRAMS :
INPUT :
I have 2 apples and 15 oranges.
OUTPUT :
Number: 2
Number: 15
b) Lex program to print out all HTML tags from a given file:
%{
#include <iostream>
using namespace std;
%}
%%
<[^>]+> { cout << "HTML Tag: " << yytext << endl; }
.|\n ; // Skip all other characters
%%
int yywrap() {
return 1;
}
int main() {
yylex(); // Call the lexer
return 0;
}
INPUT :
<html>
Hello World
<body>
OUTPUT :
HTML Tag: <html>
HTML Tag: <body>
Practical 05
THEORY:
1. Grammar: In this program, the grammar is hardcoded. For example, S -> A | B, A -> aA | ε,
and B -> b.
2. First and Follow Sets: The computeFirst and computeFollow functions compute the First
and Follow sets for the grammar.
3. Parsing Table: The createParsingTable function uses the First and Follow sets to construct
the parsing table.
4. Parsing: The parseInput function simulates the LL(1) parsing process using a stack and the
parsing table. It checks if the input string can be derived from the start symbol according to
the grammar rules.
Below is a simplified C++ program that constructs an LL(1) parsing table and performs
parsing based on a hardcoded CFG.
C++ Program: LL(1) Parser
PROGRAM:
#include <iostream>
#include <map>
#include <set>
#include <vector>
#include <stack>
#include <string>
using namespace std;
INPUT :
Enter number of productions: 3
Enter the productions (e.g., S->AB or S->ε):
S->AB
A->a
B->b
Enter the start symbol: S
Enter string to parse: ab
OUTPUT :
First Sets:
S: { a }
A: { a }
B: { b }
Follow Sets:
S: { $ }
A: { b }
B: { $ }
Parsing Table:
S, a -> AB
A, a -> a
B, b -> b
String is accepted!
Practical 06
THEORY :
Grammar: We've used the some arithmetic grammar
E -> E + T ; E -> T ; T -> T * F ; T -> F ; F -> ( E ) ; F -> id
Action Table: Determines whether to shift, reduce, or accept based on the current state and
the lookahead symbol.
Goto Table: Used during a reduction to determine the next state based on the non-terminal
symbol.
reduce() function: Handles reductions, logging the rule applied and the stack updates.
parse() function: Implements the core parsing loop. It prints detailed steps of the process,
including the current state, symbol, action taken (shift, reduce, accept, or error), and the
resulting state stack after each action.
PROGRAM :
#include <iostream>
#include <stack>
#include <vector>
#include <map>
#include <string>
using namespace std;
struct Action {
string type; // "shift", "reduce", or "accept"
int state;
int rule;
};
// Grammar rules (for reduction)
vector<pair<string, int>> grammar = {
{"E -> E + T", 3}, // Rule 1
{"E -> T", 1}, // Rule 2
{"T -> T * F", 3}, // Rule 3
{"T -> F", 1}, // Rule 4
{"F -> ( E )", 3}, // Rule 5
{"F -> id", 1} // Rule 6
};
// Action and Goto tables
map<pair<int, string>, Action> actionTable;
map<pair<int, string>, int> gotoTable;
// Stack for states and symbols
stack<int> stateStack;
stack<string> symbolStack;
// Initialize the LALR table for the simplified grammar
void initLALRTable() {
// Define action table
actionTable[{0, "id"}] = {"shift", 5, -1};
actionTable[{0, "("}] = {"shift", 4, -1};
actionTable[{1, "+"}] = {"shift", 6, -1};
actionTable[{1, "$"}] = {"accept", -1, -1};
actionTable[{2, "+"}] = {"reduce", -1, 2}; // E -> T
actionTable[{2, "*"}] = {"shift", 7, -1};
actionTable[{2, "$"}] = {"reduce", -1, 2}; // E -> T
actionTable[{3, "+"}] = {"reduce", -1, 4}; // T -> F
actionTable[{3, "*"}] = {"reduce", -1, 4}; // T -> F
actionTable[{3, "$"}] = {"reduce", -1, 4}; // T -> F
actionTable[{4, "id"}] = {"shift", 5, -1};
actionTable[{4, "("}] = {"shift", 4, -1};
actionTable[{5, "+"}] = {"reduce", -1, 6}; // F -> id
actionTable[{5, "*"}] = {"reduce", -1, 6}; // F -> id
actionTable[{5, "$"}] = {"reduce", -1, 6}; // F -> id
actionTable[{6, "id"}] = {"shift", 5, -1};
actionTable[{6, "("}] = {"shift", 4, -1};
actionTable[{7, "id"}] = {"shift", 5, -1};
// Define goto table
gotoTable[{0, "E"}] = 1;
gotoTable[{0, "T"}] = 2;
gotoTable[{0, "F"}] = 3;
gotoTable[{4, "E"}] = 8;
gotoTable[{4, "T"}] = 2;
gotoTable[{4, "F"}] = 3;
}
// Function to perform a reduction
void reduce(int rule) {
auto [production, length] = grammar[rule - 1];
string nonTerminal = production.substr(0, 1);
cout << "Reducing using rule: " << production << endl;
// Pop symbols from stack based on production length
for (int i = 0; i < length; i++) {
symbolStack.pop();
stateStack.pop();
}
symbolStack.push(nonTerminal);
int topState = stateStack.top();
stateStack.push(gotoTable[{topState, nonTerminal}]);
cout << "Goto state " << stateStack.top() << " after reducing to " << nonTerminal << endl;
}
// Function to simulate the LALR parsing process
void parse(vector<string> input) {
stateStack.push(0);
int idx = 0;
cout << "Starting parsing process..." << endl;
while (true) {
int state = stateStack.top();
string symbol = (idx < input.size()) ? input[idx] : "$";
cout << "\nCurrent state: " << state << ", current input: " << symbol << endl;
auto action = actionTable[{state, symbol}];
if (action.type == "shift") {
// Shift: Push the symbol and new state onto the stack
cout << "Shifting symbol: " << symbol << " and going to state: " << action.state <<
endl;
symbolStack.push(symbol);
stateStack.push(action.state);
idx++;
} else if (action.type == "reduce") {
// Reduce: Apply a grammar rule
reduce(action.rule);
} else if (action.type == "accept") {
// Accept: Successful parse
cout << "Input accepted!" << endl;
return;
} else { // Error: No valid action
cout << "Syntax error at symbol: " << symbol << endl;
return;
}
}}
int main() {
// Initialize LALR parser tables
initLALRTable();
// Input tokens to be parsed (Example: id + id * id)
vector<string> input = {"id", "+", "id", "*", "id"};
// Start parsing
parse(input);
return 0;
}
INPUT :
id + id * id
OUTPUT :
Current state: 0, current input: id
Shifting symbol: id and going to state: 5
THEORY :
Here's an example program that demonstrates three basic optimization techniques:
constant folding, dead code elimination, and loop optimization.
1. Constant Folding:
The compiler evaluates expressions with constant values during compile-time, instead of
calculating them at runtime.
2. Dead Code Elimination:
The compiler removes code that does not affect the program output (e.g., unused variables
or unreachable code).
3. Loop Optimization (Loop Invariant Code Motion):
The compiler optimizes loops by moving constant computations outside the loop.
PROGRAM :
#include <iostream>
using namespace std;
// Example of constant folding optimization
void constantFolding() {
cout << "\n### Constant Folding Optimization ###" << endl;
int a = 5;
int b = 10;
// Before optimization: The compiler will evaluate this at runtime.
int c = a * b + 20;
cout << "Before Optimization: c = " << c << endl;
// After optimization: Constant folding evaluates this at compile-time.
c = 5 * 10 + 20;
cout << "After Optimization: c = " << c << endl;
}
// Example of dead code elimination
void deadCodeElimination() {
cout << "\n### Dead Code Elimination Optimization ###" << endl;
int x = 10;
int y = 20;
// Before optimization: The following line is dead code (unused).
int z = x + y;
cout << "Before Optimization: Unused variable 'z' declared but not used" << endl;
// After optimization: Dead code elimination removes 'z' because it's never used.
cout << "After Optimization: Unused variable 'z' is removed" << endl;
}
// Example of loop optimization
void loopOptimization() {
cout << "\n### Loop Optimization ###" << endl;
int n = 100;
int sum = 0;
// Before optimization: Repeated computation of invariant expression in the loop.
for (int i = 0; i < n; ++i) {
sum += i * 5; // 'i * 5' can be optimized
}
cout << "Before Optimization: sum = " << sum << endl;
// After optimization: Hoisting '5' out of the loop as it's invariant.
sum = 0;
const int factor = 5;
for (int i = 0; i < n; ++i) {
sum += i * factor;
}
cout << "After Optimization: sum = " << sum << endl;
}
int main() {
cout << "Code Optimization Techniques in Compiler Design\n";
constantFolding();
deadCodeElimination();
loopOptimization();
return 0;
}
INPUT :
The above code has inbuilt inputs given into it.
OUTPUT :
THEORY :
Error recovery in a compiler is a technique used to continue parsing after an error has
occurred, enabling the compiler to detect multiple errors in a single run. There are several
error recovery methods used in compilers, such as:
1. Panic Mode Recovery: The parser discards input symbols until a synchronizing token is
found.
2. Phrase-Level Recovery: The parser replaces or inserts tokens to recover from the error
and continue parsing.
3. Error Productions: The parser includes special productions for common errors.
4. Global Correction: The parser makes minimal changes to the input string to correct the
error.
WORKING :
1. Input: The user enters a string of tokens. The valid tokens are a, b, and c.
2. Panic Mode Recovery: If an invalid token is found, the program reports the error, skips
invalid tokens, and continues parsing once a valid token is found.
3. Error Reporting: The program counts and reports errors in the input and shows where
the parsing has recovered.
PROGRAM :
#include <iostream>
#include <string>
#include <vector>
// A simple function to check if a character is a valid token (like 'a', 'b', etc.)
bool isValidToken(char token) {
return token == 'a' || token == 'b' || token == 'c';
}
// Function to simulate panic mode error recovery
void panicModeRecovery(const std::string& input) {
std::cout << "Parsing input: " << input << std::endl;
std::vector<char> validTokens = {'a', 'b', 'c'};
int errorCount = 0;
bool errorFlag = false;
for (size_t i = 0; i < input.length(); ++i) {
if (!isValidToken(input[i])) {
std::cout << "Error: Invalid token '" << input[i] << "' at position " << i << std::endl;
errorCount++;
errorFlag = true;
// Panic mode: skip tokens until a valid one is found
while (i < input.length() && !isValidToken(input[i])) {
i++;
}
if (i < input.length()) {
std::cout << "Recovered at position " << i << " with valid token '" << input[i] << "'"
<< std::endl;
}
} else {
std::cout << "Token '" << input[i] << "' is valid." << std::endl;
}
}
if (!errorFlag) {
std::cout << "No errors found in the input." << std::endl;
} else {
std::cout << "Total errors encountered: " << errorCount << std::endl;
}
}
int main() {
std::string input;
std::cout << "Enter a string of tokens (valid tokens are 'a', 'b', 'c'): ";
std::cin >> input;
panicModeRecovery(input);
return 0;
}
INPUT :
abxdce
OUTPUT :
Enter a string of tokens (valid tokens are 'a', 'b', 'c'): abxdce
Parsing input: abxdce
Token 'a' is valid.
Token 'b' is valid.
Error: Invalid token 'x' at position 2
Recovered at position 4 with valid token 'c'
Token 'c' is valid.
Token 'e' is valid.
Total errors encountered: 1
CONCLUSION : This is a simplified demonstration of panic mode error recovery. Other error
recovery methods, like phrase-level recovery, can be implemented similarly by modifying
how the errors are handled and corrected.
Practical 09
THEORY : YACC (Yet Another Compiler Compiler) is a tool used to generate parsers, which
can parse given grammars and generate syntax trees. The idea is to define a grammar in
YACC and then create a C program that processes input based on the grammar. The YACC
tool generates a parser in C, which is then compiled and linked with the lexical analyzer
(usually created with Lex or Flex).
PROGRAM :
OUTPUT :
Enter an expression: 3 + 5 * 2
13
Practical 10
PROGRAMS :
1. Lex Specification
This file defines the tokens like numbers and operators.
%{
#include "y.tab.h" /* For token definitions generated by YACC */
%}
%%
[0-9]+ { yylval = atoi(yytext); return NUMBER; }
"+" { return PLUS; }
"-" { return MINUS; }
"*" { return MULT; }
"/" { return DIV; }
[ \t\n]+ ; /* Ignore whitespace */
. { printf("Unknown character: %s\n", yytext); }
%%
int yywrap() {
return 1;
}
This Lex specification defines how to recognize numbers ([0-9]+) and operators
(+, -, *, /), while ignoring whitespace.
2. YACC Specification
This file defines the grammar for the arithmetic expressions and how the tokens
are combined.
%{
#include <stdio.h>
#include <stdlib.h>
void yyerror(const char *s);
int yylex(void);
%}
%token NUMBER
%token PLUS MINUS MULT DIV
%left PLUS MINUS
%left MULT DIV
%%
expr:
expr PLUS expr { printf("%d + %d = %d\n", $1, $3, $1 + $3); }
| expr MINUS expr { printf("%d - %d = %d\n", $1, $3, $1 - $3); }
| expr MULT expr { printf("%d * %d = %d\n", $1, $3, $1 * $3); }
| expr DIV expr {
if ($3 == 0) {
yyerror("Division by zero!");
} else {
printf("%d / %d = %d\n", $1, $3, $1 / $3);
}
}
| NUMBER { $$ = $1; }
;
%%
void yyerror(const char *s) {
fprintf(stderr, "Error: %s\n", s);
}
int main() {
printf("Enter an arithmetic expression: ");
yyparse();
return 0;
}
INPUT :
2+3*4
OUTPUT :
14