XML Parser
XML Parser
%{
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#include "y.tab.h"
static int keep;
extern char* abc;
nl (\r\n|\r|\n)
ws [ \t\r\n]+
open {nl}?"<"
close ">"{nl}?
namestart [A-Za-z\200-\377_]
namechar [A-Za-z\200-\377_0-9.-]
esc "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
name {namestart}{namechar}*
data ([^<\n&]|\n[^<&]|\n{esc}|{esc})+
comment {open}"!--"([^-]|"-"[^-])*"--"{close}
string \"([^"&]|{esc})*\"|\'([^'&]|{esc})*\'
version {open}"?XML-VERSION 1.0?"{close}
encoding {open}"?XML-ENCODING"{ws}{name}{ws}?"?"{close}
attdef {open}"?XML-ATT"
%s CONTENT
%%
<INITIAL>{ws} {/* skip */}
<INITIAL>{version} {return VERSION;}
<INITIAL>{encoding} {yylval.s = word(yytext + 14); return ENCODING;}
<INITIAL>"/" {return SLASH;}
<INITIAL>"=" {return EQ;}
<INITIAL>{close} {BEGIN(CONTENT); return CLOSE;}
<INITIAL>{name} {yylval.s = strdup(yytext); return NAME;}
<INITIAL>{string} {yylval.s = strdup(yytext); return VALUE;}
<INITIAL>"?"{close} {BEGIN(keep); return ENDDEF;}
{attdef} {keep = YY_START; BEGIN(INITIAL); return ATTDEF;}
{open}{ws}?{name} {BEGIN(INITIAL); yylval.s= word(yytext);abc=word(yytext);
return START;}
{open}{ws}?"/" {BEGIN(INITIAL); return END;}
{comment} {yylval.s = strdup(yytext); return COMMENT;}
<CONTENT>{data} {yylval.s = strdup(yytext); return DATA;}
. {fprintf(stderr, "!error due to (%c)\n", *yytext);}
{nl} {/* skip, must be an extra one at EOF */;} \
%{
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
%}
%union {char *s;}
%token VERSION ATTDEF ENDDEF EQ SLASH CLOSE END
%token <s> ENCODING NAME VALUE DATA COMMENT START
%type <s> name_opt
%%
document
: prolog element misc_seq_opt
;
prolog
: version_opt encoding_opt
misc_seq_opt
;
version_opt
: VERSION {printf("<?XML-VERSION 1.0?>\n");}
;
encoding_opt
: ENCODING {printf("<?XML-ENCODING %s\n",$1); free($1);}
| /*empty*/
;
misc_seq_opt
: misc_seq_opt misc
| /*empty*/
;
misc
: COMMENT {printf("%s", $1);}
| attribute_decl
;
attribute_decl
: ATTDEF NAME {printf("\n<?XML-ATT %s", $2);}
attribute_seq_opt ENDDEF {printf("?>\n");}
;
element
: START {printf("\n<%s", $1); list[c++] = abc; t++; free($1);}
attribute_seq_opt
empty_or_content
;
empty_or_content
: SLASH CLOSE {printf("/>\n");}
| CLOSE {printf(">\n");}
content END name_opt CLOSE {
printf("\n</%s>\n", $5); abc = $5;
if (strcmp(abc, list[t - 1]) != 0)
{
printf("\n\nERROR : '%s' Opened but
'%s' closed. Terminated.\n", list[t - 1], abc);
exit(0);
}
else
t--;
}
;
content
: content DATA {printf("%s", $2); free($2);}
| content misc
| content element
| /*empty*/
;
name_opt
: NAME {$$ = $1;}
| /*empty*/ {$$ = strdup("");}
;
attribute_seq_opt
: attribute_seq_opt attribute
| /*empty*/
;
attribute
: NAME {printf(" %s", $1); free($1);}
| NAME EQ VALUE {printf(" %s=%s", $1, $3); free($1); free($3);}
;
%%
int yywrap(void)
{
return 1;
}
int x = yyparse();
printf("total number of errors is %d\n", x);
printf("\n**Metadata (%d)** \n", c);
for (i = 0; i < c; i++)
printf("%s\n", list[i]);
return 0;
Input.xml
Compile:
flex xml_parser.l
bison -d -y xml_parser.y
./xml_parser