0% found this document useful (0 votes)
36 views6 pages

CD Lab5

This document describes a C program that implements a lexical analyzer to parse code and classify tokens. The lexical analyzer ignores whitespace, comments, and restricts identifier length. It contains functions to check if a token is a delimiter, operator, keyword, integer, real number, or valid identifier. The main function passes a sample string to the parse function, which extracts tokens and prints their classification.

Uploaded by

Adil Danad
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
36 views6 pages

CD Lab5

This document describes a C program that implements a lexical analyzer to parse code and classify tokens. The lexical analyzer ignores whitespace, comments, and restricts identifier length. It contains functions to check if a token is a delimiter, operator, keyword, integer, real number, or valid identifier. The main function passes a sample string to the parse function, which extracts tokens and prints their classification.

Uploaded by

Adil Danad
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

Lab 5

Design a lex code to extract all html tags in the given html file at run time
and store in text file given in run time
Code
%{
#include<stdio.h>
%}
%%
[<]*[^>] {fprintf(yyout,"%s",yytext);}
.;
%%
int yywrap(){}
int main(int argc, char*argv[])
{
yyin = fopen("html.html","r");
yyout=fopen("tag.txt","w");
yylex();
return 0;
}

html file
<!DOCTYPE html>
<html lang="en">
<head>
<title>CG Lab5</title>
</head>
<body>
<h1>This is First program</h1>
<p>I am executing.</p>
</body>
</html>
Output

<!DOCTYPE html
<html lang="en"
<head
<titleCG Lab5</title
</head
<body
<h1This is First program</h1
<pI am executing.</p
</body
</html

Design a lexical analyzer for a given language and the lexical analyzer
should ignore redundant spaces, tabs and new lines. It should also ignore
comments. Although, the syntax specification states that identifiers can be
arbitrarily long, you may restrict the length to some reasonable value.
Simulate the same in C language.
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
bool isDelimiter(char ch){
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == ',' || ch == ';' || ch == '>' ||ch == '<' ||
ch == '=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);
}
bool isOperator(char ch)
{ if (ch == '+' || ch == '-' || ch == '*'||
ch== '/' || ch == '>' || ch == '<'||ch == '=')
return (true);
return (false);
}
bool validIdentifier(char* str)
{if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||str[0] == '6' ||
str[0] == '7' || str[0] == '8' ||str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}
bool isKeyword(char* str)
{ if (!strcmp(str, "if") || !strcmp(str, "else")||!strcmp(str, "while") ||
!strcmp(str, "do") ||!strcmp(str, "break") ||!strcmp(str, "continue") ||
!strcmp(str, "int")|| !strcmp(str, "double") || !strcmp(str, "float")||
!strcmp(str, "return") || !strcmp(str,"char")|| !strcmp(str, "case") ||
!strcmp(str,"char")|| !strcmp(str, "sizeof") || !strcmp(str, "long")|| !strcmp(str, "short") ||
!strcmp(str, "typedef")|| !strcmp(str, "switch") || !strcmp(str, "unsigned")|| !strcmp(str, "void")
||!strcmp(str,"static") || !strcmp(str, "struct")||!strcmp(str, "goto")) return (true);return (false);
}
bool isInteger(char* str)
{
int i,
len = strlen(str);
if (len == 0)
return (false);
for (i = 0; i<len; i++)
{
if (str[i] != '0'&& str[i] != '1'&& str[i] != '2'&&
str[i] != '3'&& str[i] !='4'&& str[i] != '5'&& str[i] != '6'&&
str[i] != '7'&& str[i] != '8'&& str[i] != '9' || (str[i] == '-'&&i> 0))
return (false);
}
return (true);
}
bool isRealNumber(char* str)
{
int i,
len = strlen(str);
bool hasDecimal = false;
if (len == 0)return (false);
for (i = 0; i<len; i++)
{
if (str[i] != '0'&& str[i] != '1'&& str[i] != '2'&&
str[i] != '3'&& str[i] !='4'&& str[i] != '5'&&
str[i] != '6'&& str[i] != '7'&& str[i] != '8'&& str[i] != '9'&&
str[i] !='.'|| (str[i] == '-'&&i>0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc( sizeof(char) * (right - left + 2));
for (i = left; i<= right; i++)
subStr[i - left] = str[i]; subStr[right - left + 1] = '\0';
return (subStr);
}
void parse(char* str)
{
int left = 0, right =0; int len = strlen(str);
while (right <= len&& left <= right)
{ if (isDelimiter(str[right]) == false) right++;
if (isDelimiter(str[right]) == true && left == right){
if (isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]); right++;left = right;}
else if (isDelimiter(str[right]) == true &&left != right || (right == len&&
left != right))
{
char* subStr = subString(str, left, right - 1);
if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);
else if (isInteger(subStr) == true)
printf("'%s' IS ANINTEGER\n", subStr);
else if (isRealNumber(subStr) == true)
printf("'%s' IS A REAL NUMBER\n", subStr);
else if (validIdentifier(subStr) == true &&isDelimiter(str[right - 1]) ==
false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);
else if (validIdentifier(subStr) == false &&isDelimiter(str[right - 1]) ==
false)
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr); left =
right;}}return;
}
int main()
{
char str[100] = "int a = b + 20;";
parse(str);
return(0);
}

Output

You might also like