0% found this document useful (0 votes)
17 views

C2ex Java

Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views

C2ex Java

Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 6

Date: Ex – 1(b)Lexical analyser

Aim:
To develop a Lexical Analyzer that processes C code to identify and classify keywords,
identifiers, operators, punctuation, constants, and lexemes from a source file.

Algorithm:

1. Initialize Data Structures:

● Use LinkedHashSet to store identifiers, preserving insertion order.


● Define lists for keywords, operators, punctuation, constants, and lexemes.
● Initialize predefined sets of keywords, operators, and punctuation symbols.

2. Read File Line by Line:

● Open and read the file using BufferedReader.


● Process each line by splitting it into tokens based on whitespace and non-word characters.

3. Handle Special Tokens:

● Skip preprocessor directives and headers (e.g., #include <stdio.h>).


● Process string literals ("..."), character literals ('A'), and function calls (func()).

4. Classify Tokens:

● Add tokens to the respective lists (keywords, operators, punctuation, constants) based on
their type.
● Add single alphabetical characters as identifiers.

5. Store Lexemes:

● Store any token that doesn't fit into keywords, operators, punctuation, constants, or
identifiers into the lexemes list.

6. Display Symbol Table:


● After processing all lines, print the contents of the symbol table including keywords,
identifiers, operators, punctuation, constants, and lexemes.

Code:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Set;

public class LexicalAnalyzer2 {


static Set<String> identifiers = new LinkedHashSet<>(); // Use LinkedHashSet to maintain
insertion order
static ArrayList<String> keywordsList = new ArrayList<>();
static ArrayList<String> operatorsList = new ArrayList<>();
static ArrayList<Character> punctuationList = new ArrayList<>();
static ArrayList<String> constantsList = new ArrayList<>();
static ArrayList<String> lexemes = new ArrayList<>(); // New array for function names and
others

// Define initial keywords and operators


static Set<String> keywords = new HashSet<>(Arrays.asList(
"int", "float", "char", "void", "if", "else", "while", "return",
"for", "do", "switch", "case", "include", "stdio", "main"
));
static Set<String> operators = new HashSet<>(Arrays.asList(
"+", "-", "*", "/", "=", "++", "--", "==", "!=", ">", "<", ">=", "<=", "&&", "||"
));
static Set<Character> punctuations = new HashSet<>(Arrays.asList(
';', ',', '(', ')', '{', '}', '[', ']'
));

static void processLine(String line) {


// Handle multi-character tokens like strings and function calls
String[] tokens = line.split("(?=\\W)|(?<=\\W)");

for (String token : tokens) {


token = token.trim();

if (token.isEmpty()) {
continue; // Skip empty tokens
}

// Skip preprocessor directives


if (token.startsWith("#")) {
continue;
}

// Skip header files or anything in angle brackets (e.g., <stdio.h>)


if (token.startsWith("<") && token.endsWith(">")) {
continue;
}

// Handle string literals (e.g., "Hello, World!\n")


if (token.startsWith("\"") && token.endsWith("\"")) {
lexemes.add(token);
continue;
}

// Handle character literals (e.g., 'A')


if (token.startsWith("'") && token.endsWith("'") && token.length() == 3) {
lexemes.add(token);
continue;
}

// Handle function calls


if (token.contains("(") && token.contains(")")) {
lexemes.add(token);
continue;
}

// Process other tokens


if (keywords.contains(token)) {
if (!keywordsList.contains(token)) {
keywordsList.add(token);
}
} else if (operators.contains(token)) {
operatorsList.add(token);
} else if (punctuations.contains(token.charAt(0))) {
punctuationList.add(token.charAt(0));
} else if (Character.isDigit(token.charAt(0))) {
constantsList.add(token);
} else if (isSingleAlphabetic(token)) {
// Ensure only single alphabetical tokens are added as identifiers
identifiers.add(token);
} else {
// Tokens that are not identifiers or constants might be part of lexemes
lexemes.add(token);
}
}
}

// Helper method to check if a token is a single alphabetic character


private static boolean isSingleAlphabetic(String token) {
return token.length() == 1 && Character.isLetter(token.charAt(0));
}

public static void main(String[] args) {


// Hardcoded file path
String filePath = "C:\\4025 CSA\\dio2.c";

try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {


String line;
while ((line = br.readLine()) != null) {
processLine(line);
}
} catch (IOException e) {
System.out.println("An error occurred while reading the file.");
e.printStackTrace();
}

// Display the symbol table after processing the entire file


System.out.println("Symbol Table:");
System.out.println("Keywords: " + String.join(", ", keywordsList));
System.out.println("Identifiers: " + String.join(", ", identifiers));
System.out.println("Operators: " + String.join(", ", operatorsList));
System.out.println("Punctuations: " + punctuationList.toString());
System.out.println("Constants: " + String.join(", ", constantsList));
System.out.println("Lexemes: " + String.join(", ", lexemes)); // New output for lexemes
}
}

Dio2.c
#include <stdio.h>
int main() {
int a = 10;
float b = 20.5;
char c = 'A';

a = a + 1;
b = b * 2;
printf("Hello, World!\n");

return 0;
}

Output

Symbol Table:
Keywords: include, stdio, int, main, float, char, return
Identifiers: a, b, c
Operators: <, >, =, =, =, =, +, =, *
Punctuations: [(, ), {, ;, ;, ;, ;, ;, (, ,, ), ;, ;, }]
Constants: 10, 20, 5, 1, 2, 0
Lexemes: ., ., ', ', printf, ", Hello, World, !, \, "

Result:
Hence a Lexical Analyzer that processes C code to identify and classify keywords has been
successfully written, executed and its output verified successfully.

You might also like