0% found this document useful (0 votes)
27 views6 pages

C2ex Java

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
27 views6 pages

C2ex Java

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

Date: Ex – 1(b)Lexical analyser

Aim:
To develop a Lexical Analyzer that processes C code to identify and classify keywords,
identifiers, operators, punctuation, constants, and lexemes from a source file.

Algorithm:

1. Initialize Data Structures:

● Use LinkedHashSet to store identifiers, preserving insertion order.


● Define lists for keywords, operators, punctuation, constants, and lexemes.
● Initialize predefined sets of keywords, operators, and punctuation symbols.

2. Read File Line by Line:

● Open and read the file using BufferedReader.


● Process each line by splitting it into tokens based on whitespace and non-word characters.

3. Handle Special Tokens:

● Skip preprocessor directives and headers (e.g., #include <stdio.h>).


● Process string literals ("..."), character literals ('A'), and function calls (func()).

4. Classify Tokens:

● Add tokens to the respective lists (keywords, operators, punctuation, constants) based on
their type.
● Add single alphabetical characters as identifiers.

5. Store Lexemes:

● Store any token that doesn't fit into keywords, operators, punctuation, constants, or
identifiers into the lexemes list.

6. Display Symbol Table:


● After processing all lines, print the contents of the symbol table including keywords,
identifiers, operators, punctuation, constants, and lexemes.

Code:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Set;

public class LexicalAnalyzer2 {


static Set<String> identifiers = new LinkedHashSet<>(); // Use LinkedHashSet to maintain
insertion order
static ArrayList<String> keywordsList = new ArrayList<>();
static ArrayList<String> operatorsList = new ArrayList<>();
static ArrayList<Character> punctuationList = new ArrayList<>();
static ArrayList<String> constantsList = new ArrayList<>();
static ArrayList<String> lexemes = new ArrayList<>(); // New array for function names and
others

// Define initial keywords and operators


static Set<String> keywords = new HashSet<>(Arrays.asList(
"int", "float", "char", "void", "if", "else", "while", "return",
"for", "do", "switch", "case", "include", "stdio", "main"
));
static Set<String> operators = new HashSet<>(Arrays.asList(
"+", "-", "*", "/", "=", "++", "--", "==", "!=", ">", "<", ">=", "<=", "&&", "||"
));
static Set<Character> punctuations = new HashSet<>(Arrays.asList(
';', ',', '(', ')', '{', '}', '[', ']'
));

static void processLine(String line) {


// Handle multi-character tokens like strings and function calls
String[] tokens = line.split("(?=\\W)|(?<=\\W)");

for (String token : tokens) {


token = token.trim();

if (token.isEmpty()) {
continue; // Skip empty tokens
}

// Skip preprocessor directives


if (token.startsWith("#")) {
continue;
}

// Skip header files or anything in angle brackets (e.g., <stdio.h>)


if (token.startsWith("<") && token.endsWith(">")) {
continue;
}

// Handle string literals (e.g., "Hello, World!\n")


if (token.startsWith("\"") && token.endsWith("\"")) {
lexemes.add(token);
continue;
}

// Handle character literals (e.g., 'A')


if (token.startsWith("'") && token.endsWith("'") && token.length() == 3) {
lexemes.add(token);
continue;
}

// Handle function calls


if (token.contains("(") && token.contains(")")) {
lexemes.add(token);
continue;
}

// Process other tokens


if (keywords.contains(token)) {
if (!keywordsList.contains(token)) {
keywordsList.add(token);
}
} else if (operators.contains(token)) {
operatorsList.add(token);
} else if (punctuations.contains(token.charAt(0))) {
punctuationList.add(token.charAt(0));
} else if (Character.isDigit(token.charAt(0))) {
constantsList.add(token);
} else if (isSingleAlphabetic(token)) {
// Ensure only single alphabetical tokens are added as identifiers
identifiers.add(token);
} else {
// Tokens that are not identifiers or constants might be part of lexemes
lexemes.add(token);
}
}
}

// Helper method to check if a token is a single alphabetic character


private static boolean isSingleAlphabetic(String token) {
return token.length() == 1 && Character.isLetter(token.charAt(0));
}

public static void main(String[] args) {


// Hardcoded file path
String filePath = "C:\\4025 CSA\\dio2.c";

try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {


String line;
while ((line = br.readLine()) != null) {
processLine(line);
}
} catch (IOException e) {
System.out.println("An error occurred while reading the file.");
e.printStackTrace();
}

// Display the symbol table after processing the entire file


System.out.println("Symbol Table:");
System.out.println("Keywords: " + String.join(", ", keywordsList));
System.out.println("Identifiers: " + String.join(", ", identifiers));
System.out.println("Operators: " + String.join(", ", operatorsList));
System.out.println("Punctuations: " + punctuationList.toString());
System.out.println("Constants: " + String.join(", ", constantsList));
System.out.println("Lexemes: " + String.join(", ", lexemes)); // New output for lexemes
}
}

Dio2.c
#include <stdio.h>
int main() {
int a = 10;
float b = 20.5;
char c = 'A';

a = a + 1;
b = b * 2;
printf("Hello, World!\n");

return 0;
}

Output

Symbol Table:
Keywords: include, stdio, int, main, float, char, return
Identifiers: a, b, c
Operators: <, >, =, =, =, =, +, =, *
Punctuations: [(, ), {, ;, ;, ;, ;, ;, (, ,, ), ;, ;, }]
Constants: 10, 20, 5, 1, 2, 0
Lexemes: ., ., ', ', printf, ", Hello, World, !, \, "

Result:
Hence a Lexical Analyzer that processes C code to identify and classify keywords has been
successfully written, executed and its output verified successfully.

You might also like