0% found this document useful (0 votes)
4 views29 pages

Compiler Construction Lecture 8

The document outlines the construction of a lexical analyzer using Flex for C++. It includes specifications for token definitions, the invocation of Flex, and the implementation of a scanner that tokenizes input code. Additionally, it provides details on handling various data types and operators in the C++ language.

Uploaded by

programmerareeba
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views29 pages

Compiler Construction Lecture 8

The document outlines the construction of a lexical analyzer using Flex for C++. It includes specifications for token definitions, the invocation of Flex, and the implementation of a scanner that tokenizes input code. Additionally, it provides details on handling various data types and operators in the C++ language.

Uploaded by

programmerareeba
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 29

1

COMPILER CONSTRUCTION(CS-636)

Gul Sher Ali


MS(CS & Tech.)-China

______________________________________________________
GIMS- PMAS Arid Agriculture University, Gujrat Campus
2
Specification File lex.l 3
%{
#include “tokdefs.h”
%}
D [0-9]
L [a-zA-Z_]
id {L}({L}|{D})*
%%
"void" {return(TOK_VOID);}
"int" {return(TOK_INT);}
"if" {return(TOK_IF);}
Specification File lex.l 4

"else" {return(TOK_ELSE);}
"while"{return(TOK_WHILE)};
"<=" {return(TOK_LE);}
">=" {return(TOK_GE);}
"==" {return(TOK_EQ);}
"!=" {return(TOK_NE);}
{D}+ {return(TOK_INT);}
{id} {return(TOK_ID);}
[\n]|[\t]|[ ] ;
%%
File tokdefs.h 5
#define TOK_VOID 1
#define TOK_INT 2
#define TOK_IF 3
#define TOK_ELSE 4
#define TOK_WHILE 5
#define TOK_LE 6
#define TOK_GE 7
#define TOK_EQ 8
#define TOK_NE 9
#define TOK_INT 10
#define TOK_ID 111
Invoking Flex 6

lex.l flex lex.cpp


Using Generated Scanner 7
void main()
{
FlexLexer lex;
int tc = lex.yylex();
while(tc != 0)
cout << tc << “,”
<<lex.YYText() << endl;
tc = lex.yylex();
}
Creating Scanner EXE 8

flex lex.l
g++ –c lex.cpp
g++ –c main.cpp
g++ –o lex.exe lex.o main.o

lex <main.cpp
Using Generated Scanner 9
void main()
{
FlexLexer lex;
int tc = lex.yylex();
while(tc != 0)
cout << tc << “,”
<<lex.YYText() << endl;
tc = lex.yylex();
}
Input Tokenized 10

dos> .\lex < main.cpp


259,void
258,main
283,(
284,)
285,{
258,FlexLexer
258,lex
290,;
260,int
Input Tokenized 11

258,tc
266,=
258,lex
291,.
258,yylex
283,(
284,)
290,;
263,while
Input Tokenized 12

283,(
258,tc
276,!=
257,0
284,)
258,cout
279,<<
258,tc
Input Tokenized 13

279,<<
292,","
279,<<
258,lex
291,.
258,YYText
283,(
284,)
279,<<
Input Tokenized 14
258,endl
290,;
258,tc
266,=
258,lex
291,.
258,yylex
283,(
284,)
290,;
286,}
Flex Input for C++ 15
/*
* ISO C++ lexical analyzer.
* Based on the ISO C++ draft standard of December
'96.
*/

%{
#include <ctype.h>
#include <stdio.h>
#include “tokdefs.h"

int lineno;

static int yywrap(void);


static void skip_until_eol(void);
static void skip_comment(void);
static int check_identifier(const char *);
%}
intsuffix ([uU][lL]?)|([lL][uU]?) 16
fracconst ([0-9]*\.[0-9]+)|([0-9]+\.)
exppart [eE][-+]?[0-9]+
floatsuffix [fFlL]
chartext ([^'])|(\\.)
stringtext([^"])|(\\.)
%%
%% 17
"\n" { ++lineno; }
[\t\f\v\r ]+ { /* Ignore whitespace. */ }

"/*" { skip_comment(); }
"//" { skip_until_eol(); }

"{" { return '{'; }


"<%" { return '{'; }
"}" { return '}'; }
"%>" { return '}'; }
"[" { return '['; }
"<:" { return '['; }
"]" { return ']'; } 18
":>" { return ']'; }
"(" { return '('; }
")" { return ')'; }
";" { return ';'; }
":" { return ':'; }
"..." { return ELLIPSIS; }
"?" { return '?'; }
"::" { return COLONCOLON; }
"." { return '.'; }
".*" { return DOTSTAR; }
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"%" { return '%'; }
"^" { return '^'; }
"xor" { return '^'; }
"&" { return '&'; }
"bitand" { return '&'; }
"|" { return '|'; }
"bitor" { return '|'; } 19
"~" { return '~'; }
"compl" { return '~'; }
"!" { return '!'; }
"not" { return '!'; }
"=" { return '='; }
"<" { return '<'; }
">" { return '>'; }
"+=" { return ADDEQ; }
"-=" { return SUBEQ; }
"*=" { return MULEQ; }
"/=" { return DIVEQ; }
"%=" { return MODEQ; }
"^=" { return XOREQ; }
"xor_eq" { return XOREQ; }
"&=" { return ANDEQ; }
"and_eq" { return ANDEQ; }
"|=" { return OREQ; }
"or_eq" { return OREQ; }
20
"<<" { return SL; }
">>" { return SR; }
"<<=" { return SLEQ; }
">>=" { return SREQ; }
"==" { return EQ; }
"!=" { return NOTEQ; }
"not_eq" { return NOTEQ; }
"<=" { return LTEQ; }
">=" { return GTEQ; }
"&&" { return ANDAND; }
"and" { return ANDAND; }
"||" { return OROR; }
"or" { return OROR; }
"++" { return PLUSPLUS; }
"--" { return MINUSMINUS; }
"," { return ','; }
"->*" { return ARROWSTAR; }
"->" { return ARROW; }
"asm" { return ASM; } 21
"auto" { return AUTO; }
"bool" { return BOOL; }
"break" { return BREAK; }
"case" { return CASE; }
"catch" { return CATCH; }
"char" { return CHAR; }
"class" { return CLASS; }
"const" { return CONST; }
"const_cast" { return CONST_CAST; }
"continue" { return CONTINUE; }
"default" { return DEFAULT; }
"delete" { return DELETE; }
"do" { return DO; }
"double" { return DOUBLE; }
"dynamic_cast" { return DYNAMIC_CAST; }
"else" { return ELSE; }
"enum" { return ENUM; }
"explicit" { return EXPLICIT; }
"export" { return EXPORT; }
"extern" { return EXTERN; } 22
"false" { return FALSE; }
"float" { return FLOAT; }
"for" { return FOR; }
"friend" { return FRIEND; }
"goto" { return GOTO; }
"if" { return IF; }
"inline" { return INLINE; }
"int" { return INT; }
"long" { return LONG; }
"mutable" { return MUTABLE; }
"namespace" { return NAMESPACE; }
"new" { return NEW; }
"operator" { return OPERATOR; }
"private" { return PRIVATE; }
"protected" { return PROTECTED; }
"public" { return PUBLIC; }
"register" { return REGISTER; }
"reinterpret_cast" { return REINTERPRET_CAST; }
"return" { return RETURN; }
"short" { return SHORT; } 23
"signed" { return SIGNED; }
"sizeof" { return SIZEOF; }
"static" { return STATIC; }
"static_cast" { return STATIC_CAST; }
"struct" { return STRUCT; }
"switch" { return SWITCH; }
"template" { return TEMPLATE; }
"this" { return THIS; }
"throw" { return THROW; }
"true" { return TRUE; }
"try" { return TRY; }
"typedef" { return TYPEDEF; }
"typeid" { return TYPEID; }
"typename" { return TYPENAME; }
"union" { return UNION; }
"unsigned" { return UNSIGNED; }
"using" { return USING; }
"virtual" { return VIRTUAL; }
"void" { return VOID; }
"volatile" { return VOLATILE; }
24
"wchar_t" { return WCHAR_T; }
"while" { return WHILE; }

[a-zA-Z_][a-zA-Z_0-9]*
{ return check_identifier(yytext); }

"0"[xX][0-9a-fA-F]+{intsuffix}? { return INTEGER; }


"0"[0-7]+{intsuffix}? { return INTEGER; }
[0-9]+{intsuffix}? { return INTEGER; }
{fracconst}{exppart}?{floatsuffix}? { return
FLOATING; }
25
[0-9]+{exppart}{floatsuffix}? { return FLOATING; }

"'"{chartext}*"'" { return CHARACTER; }


"L'"{chartext}*"'" { return CHARACTER; }

"\""{stringtext}*"\"" { return STRING; }


"L\""{stringtext}*"\"" { return STRING; }
. { fprintf(stderr,
26
"%d: unexpected character `%c'\n", lineno,
yytext[0]); }

%%

static int
yywrap(void)
{
return 1;
}
static void
27
skip_comment(void)
{
int c1, c2;

c1 = input();
c2 = input();

while(c2 != EOF && !(c1 == '*' && c2 == '/'))


{
if (c1 == '\n')
++lineno;
c1 = c2;
c2 = input();
}
}
28
static void
skip_until_eol(void)
{
int c;

while ((c = input()) != EOF && c != '\n')


;
++lineno;
}
static int 29
check_identifier(const char *s)
{
/*
* This function should check if `s' is a
* typedef name or a class
* name, or a enum name, ... etc. or
* an identifier.
*/
switch (s[0]) {
case 'D': return TYPEDEF_NAME;
case 'N': return NAMESPACE_NAME;
case 'C': return CLASS_NAME;
case 'E': return ENUM_NAME;
case 'T': return TEMPLATE_NAME;
}
return IDENTIFIER;
}

You might also like