0% found this document useful (0 votes)
9 views

XML Parser

The document contains code for an XML parser written in lex and yacc. The lex file defines regular expressions to tokenize elements of an XML document like tags, attributes, comments, and data. The yacc file specifies a context-free grammar for the XML language and uses the tokens to implement a parser. It prints parsed elements to stdout and checks that opening and closing tags are properly nested.

Uploaded by

Reethu
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
9 views

XML Parser

The document contains code for an XML parser written in lex and yacc. The lex file defines regular expressions to tokenize elements of an XML document like tags, attributes, comments, and data. The yacc file specifies a context-free grammar for the XML language and uses the tokens to implement a parser. It prints parsed elements to stdout and checks that opening and closing tags are properly nested.

Uploaded by

Reethu
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 5

Lex code: xml_parser.

%{
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "y.tab.h"
static int keep;
extern char* abc;

//Storing string in the buffer


static char* word(char *s)
{
char *buf;
int i, k;
for (k = 0; isspace(s[k]) || s[k] == '<'; k++) ;
for (i = k; s[i] && ! isspace(s[i]); i++) ;
buf = (char*)malloc((i - k + 1) * sizeof(char));
strncpy(buf, &s[k], i - k);
buf[i - k] = '\0';
return buf;
}
%}

nl (\r\n|\r|\n)
ws [ \t\r\n]+
open {nl}?"<"
close ">"{nl}?
namestart [A-Za-z\200-\377_]
namechar [A-Za-z\200-\377_0-9.-]
esc "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
name {namestart}{namechar}*
data ([^<\n&]|\n[^<&]|\n{esc}|{esc})+
comment {open}"!--"([^-]|"-"[^-])*"--"{close}
string \"([^"&]|{esc})*\"|\'([^'&]|{esc})*\'
version {open}"?XML-VERSION 1.0?"{close}

encoding {open}"?XML-ENCODING"{ws}{name}{ws}?"?"{close}
attdef {open}"?XML-ATT"

%s CONTENT
%%
<INITIAL>{ws} {/* skip */}
<INITIAL>{version} {return VERSION;}
<INITIAL>{encoding} {yylval.s = word(yytext + 14); return ENCODING;}
<INITIAL>"/" {return SLASH;}
<INITIAL>"=" {return EQ;}
<INITIAL>{close} {BEGIN(CONTENT); return CLOSE;}
<INITIAL>{name} {yylval.s = strdup(yytext); return NAME;}
<INITIAL>{string} {yylval.s = strdup(yytext); return VALUE;}
<INITIAL>"?"{close} {BEGIN(keep); return ENDDEF;}
{attdef} {keep = YY_START; BEGIN(INITIAL); return ATTDEF;}
{open}{ws}?{name} {BEGIN(INITIAL); yylval.s= word(yytext);abc=word(yytext);
return START;}
{open}{ws}?"/" {BEGIN(INITIAL); return END;}
{comment} {yylval.s = strdup(yytext); return COMMENT;}
<CONTENT>{data} {yylval.s = strdup(yytext); return DATA;}
. {fprintf(stderr, "!error due to (%c)\n", *yytext);}
{nl} {/* skip, must be an extra one at EOF */;} \

Yacc code: xml_parser.y

%{
#include <string.h>
#include <stdio.h>
#include <stdlib.h>

extern FILE *yyin;


char *abc, *list[50];
int c = 0, t = 0;

void yyerror(char *msg);


int yylex();

%}
%union {char *s;}
%token VERSION ATTDEF ENDDEF EQ SLASH CLOSE END
%token <s> ENCODING NAME VALUE DATA COMMENT START
%type <s> name_opt

%%
document
: prolog element misc_seq_opt
;
prolog
: version_opt encoding_opt
misc_seq_opt
;
version_opt
: VERSION {printf("<?XML-VERSION 1.0?>\n");}

;
encoding_opt
: ENCODING {printf("<?XML-ENCODING %s\n",$1); free($1);}
| /*empty*/
;
misc_seq_opt
: misc_seq_opt misc
| /*empty*/
;
misc
: COMMENT {printf("%s", $1);}
| attribute_decl
;
attribute_decl
: ATTDEF NAME {printf("\n<?XML-ATT %s", $2);}
attribute_seq_opt ENDDEF {printf("?>\n");}
;
element
: START {printf("\n<%s", $1); list[c++] = abc; t++; free($1);}
attribute_seq_opt
empty_or_content
;

empty_or_content
: SLASH CLOSE {printf("/>\n");}
| CLOSE {printf(">\n");}
content END name_opt CLOSE {
printf("\n</%s>\n", $5); abc = $5;
if (strcmp(abc, list[t - 1]) != 0)
{
printf("\n\nERROR : '%s' Opened but
'%s' closed. Terminated.\n", list[t - 1], abc);
exit(0);
}
else
t--;
}
;
content
: content DATA {printf("%s", $2); free($2);}
| content misc
| content element
| /*empty*/
;
name_opt
: NAME {$$ = $1;}
| /*empty*/ {$$ = strdup("");}
;
attribute_seq_opt
: attribute_seq_opt attribute
| /*empty*/
;
attribute
: NAME {printf(" %s", $1); free($1);}
| NAME EQ VALUE {printf(" %s=%s", $1, $3); free($1); free($3);}
;
%%

int yywrap(void)
{
return 1;
}

void yyerror(char *msg)


{
printf("\n\n%s\n\n", msg);
}

int main(int argc, char *argv[])


{
char *s;
int no_errors = 0, no_error2 = 0, no_errors3 = 0, i;
yyin = fopen("input.xml", "r");

int x = yyparse();
printf("total number of errors is %d\n", x);
printf("\n**Metadata (%d)** \n", c);
for (i = 0; i < c; i++)
printf("%s\n", list[i]);

return 0;
Input.xml

<?XML-VERSION 1.0?><?XML-ENCODING utg-8?>


<book>
harry potter
<totalCost>1500/-
<gst>70/-
<ugst>30/-
<cgst>20/-
<sgst>15/-
<igst>
5/-
</igst>
</sgst>
</cgst>
</ugst>
</gst>
</totalCost>
</book>

Compile:

flex xml_parser.l

bison -d -y xml_parser.y

gcc -o xml_parser y.tab.c lex.yy.c

./xml_parser

You might also like