Yacc PDF
Yacc PDF
C declarations
Essentially, Yacc generates an LALR parser, given a grammar %}
description. Yacc declarations
%%
A Yacc program has four (or three?) parts (much like a Lex
translation rules
program):
%%
%{ supporting C functions
C declarations
%} C declarations are copied as is. Here declare variables used in
Yacc declarations translation rules and elsewhere. Include files. Define constants.
%%
translation rules Yacc declarations — declare tokens of the grammar, and
%% precedence and associativity of tokens.
supporting C functions Translation rules are essentially grammar productions. Each
production can be followed by a “semantic action” which is
(As with Lex, not all parts are necessary.) essentially C code to be executed whenever the production is
used in a reduction.
Yacc takes a Yacc program and translates it into a C program
y.tab.c. Supporting C functions — the user must provide a lexical
analyzer yylex(), an error function yyerror(), and a
Included in this program is an LALR parsing table and an LR main() function that calls the generated parser yyparse().
parse driver.
1 2
Here is an example Yacc program for a simple calculator:
%{ yyerror(char *s) {
#include <ctype.h> printf("%s\n", s);
#include <stdio.h> }
%}
%token DIGIT main() {
%% yyparse();
line : expr ’\n’ { printf("%d\n", $1); } }
;
expr : expr ’+’ term { $$ = $1 + $3; }
> yacc -v d1.y
| term { $$ = $1; }
> cc -o d1y y.tab.c
;
term : term ’*’ factor { $$ = $1 * $3; } > d1y
| factor { $$ = $1; } 1+2*3*(4+5)
; 55
factor : ’(’ expr ’)’ { $$ = $2; }
> d1y
| DIGIT { $$ = $1; }
; 1++1
%% syntax error
yylex() {
int c;
while ((c=getchar()) == ’ ’);
if (isdigit(c)) { The -v option produces a human-readable version of the
yylval = c - ’0’; LALR parsing table in a file y.output. Let’s look at it. . .
return DIGIT;
}
return c;
}
...
3 4
The dotted action in state 1 is the default action. So we see
0 $accept : line $end that Yacc may allow reductions that are not warranted by the
1 line : expr ’\n’ LALR parsing table. But unwarranted shifts are not allowed.
DIGIT shift 1
state 0 ’(’ shift 2
$accept : . line $end (0) . error
line goto 3
expr goto 4 state 3
term goto 5 $accept : line . $end (0)
factor goto 6
$end accept
’\n’ shift 8
’+’ shift 9
And what is goto(0, DIGIT) ? . error
5 6
state 5 state 9
expr : term . (3) expr : expr ’+’ . term (2)
term : term . ’*’ factor (4)
DIGIT shift 1
’*’ shift 10 ’(’ shift 2
’\n’ reduce 3 . error
’+’ reduce 3
’)’ reduce 3 term goto 12
factor goto 6
state 6
term : factor . (5) state 10
term : term ’*’ . factor (4)
. reduce 5
DIGIT shift 1
’(’ shift 2
state 7 . error
expr : expr . ’+’ term (2)
factor : ’(’ expr . ’)’ (6) factor goto 13
’+’ shift 9
’)’ shift 11 state 11
. error factor : ’(’ expr ’)’ . (6)
. reduce 6
state 8
line : expr ’\n’ . (1)
. reduce 1
7 8
Yacc tries very hard to build a parser — when the grammar is
not LALR, Yacc uses various strategies to resolve shift/reduce
state 12 conflicts and reduce/reduce conflicts.
expr : expr ’+’ term . (2)
term : term . ’*’ factor (4) To begin:
’*’ shift 10 • Yacc resolves shift/reduce conflicts in favor of shifts
’\n’ reduce 2 (all else being equal).
’+’ reduce 2
’)’ reduce 2 • Yacc resolves reduce/reduce conflicts in favor of the
production that occurs earlier in the grammar
state 13 specification.
term : term ’*’ factor . (4)
8 terminals, 5 nonterminals
8 grammar rules, 14 states Recall, for instance, that the ambiguities in the grammar
E → E + E | E ∗ E | (E) | a
9 10
%{
#include <ctype.h> > yacc -v d2.y
#include <stdio.h> conflicts: 4 shift/reduce
%} > cat y.output
%token DIGIT
%% state 0
line : expr ’\n’ { printf("%d\n", $1); } $accept : _line $end
;
expr : expr ’+’ expr { $$ = $1 + $3; } DIGIT shift 4
| expr ’*’ expr { $$ = $1 * $3; } ( shift 3
| ’(’ expr ’)’ { $$ = $2; } . error
| DIGIT
; line goto 1
%% expr goto 2
yylex() {
int c; state 1
while ((c=getchar()) == ’ ’); $accept : line_$end
if (isdigit(c)) {
yylval = c - ’0’; $end accept
return DIGIT; . error
}
return c;
} state 2
yyerror(char *s) { line : expr_\n
printf("%s\n", s); expr : expr_+ expr
} expr : expr_* expr
main() {
yyparse(); \n shift 5
} + shift 6
* shift 7
. error
11 12
state 7
state 3 expr : expr *_expr
expr : (_expr )
DIGIT shift 4
DIGIT shift 4 ( shift 3
( shift 3 . error
. error
expr goto 10
expr goto 8
state 8
state 4 expr : expr_+ expr
expr : DIGIT_ (5) expr : expr_* expr
expr : ( expr_)
. reduce 5
+ shift 6
* shift 7
state 5 ) shift 11
line : expr \n_ (1) . error
. reduce 1
9: shift/reduce conflict (shift 6, red’n 2) on +
9: shift/reduce conflict (shift 7, red’n 2) on *
state 6 state 9
expr : expr +_expr expr : expr_+ expr
expr : expr + expr_ (2)
DIGIT shift 4 expr : expr_* expr
( shift 3
. error + shift 6
* shift 7
expr goto 9 . reduce 2
13 14
%{
#include <ctype.h>
10: shift/reduce conflict (shift 6, red’n 3) on + #include <stdio.h>
10: shift/reduce conflict (shift 7, red’n 3) on * %}
state 10 %token DIGIT
expr : expr_+ expr %left ’+’
expr : expr_* expr %left ’*’
expr : expr * expr_ (3) %%
line : expr ’\n’ { printf("%d\n", $1); }
+ shift 6 ;
* shift 7 expr : expr ’+’ expr { $$ = $1 + $3; }
. reduce 3 | expr ’*’ expr { $$ = $1 * $3; }
| ’(’ expr ’)’ { $$ = $2; }
| DIGIT
state 11 ;
expr : ( expr )_ (4) %%
yylex() {
. reduce 4 int c;
while ((c=getchar()) == ’ ’);
if (isdigit(c)) {
8 terminals, 2 nonterminals yylval = c - ’0’;
6 grammar rules, 12 states return DIGIT;
4 shift/reduce, 0 reduce/reduce conflicts reported }
return c;
}
yyerror(char *s) {
printf("%s\n", s);
}
main() {
yyparse();
}
15 16
state 2
> yacc -v d3.y line : expr_\n
> gcc -o d3y y.tab.c expr : expr_+ expr
> d3y expr : expr_* expr
1+2*3*(4+5)
55 + shift 6
> d3y * shift 7
1++ \n shift 5
syntax error . error
> cat y.output
state 0 state 3
$accept : _line $end expr : (_expr )
. reduce 1
17 18
state 9
state 6 expr : expr_+ expr
expr : expr +_expr expr : expr + expr_ (2)
expr : expr_* expr
DIGIT shift 4
( shift 3 * shift 7
. error . reduce 2
expr goto 9
state 10
state 7 expr : expr_+ expr
expr : expr *_expr expr : expr_* expr
expr : expr * expr_ (3)
DIGIT shift 4
( shift 3 . reduce 3
. error
19 20
0 S′ → S
1, 2, 3, 4 S → aAa | bAb | aBb | bBa
Recall (from a while back) the following grammar that is
5 A→a
LR(1) but not LALR(1):
6 B→a
I0: {[S ′ → ·S, $], [S → ·aAa, $], [S → ·bAb, $], [S → ·aBb, $], [S → ·bBa, $]}
S → aAa | bAb | aBb | bBa
I1: {[S ′ → S · , $]}
A → a I2: {[S → a · Aa, $], [S → a · Bb, $], [A → ·a, a], [B → ·a, b]}
B → a I3: {[S → b · Ab, $], [S → b · Ba, $], [A → ·a, b], [B → ·a, a]}
I4:{[S → aA · a, $]} I5:{[S → aB · b, $]} I6:{[A → a · , a], [B → a · , b]}
This unambiguous grammar generates the language I7:{[S → bA · b, $]} I8:{[S → bB · a, $]} I9:{[A → a · , b], [B → a · , a]}
{aaa, bab, aab, baa}. I10: {[S → aAa · , $]} I11: {[S → aBb · , $]}
I12: {[S → bAb · , $]} I13: {[S → bBa · , $]}
Here’s the LR(1) parsing table. . .
action goto
STATE a b $ S A B
0 s2 s3 1
1 acc
2 s6 s6 4 5
3 s9 s9 7 8
4 s10
5 s11
6 r5 r6
7 s12
8 s13
9 r6 r5
10 r1
11 r3
12 r2
13 r4
21 22
Now consider the corresponding Yacc program. . . state 0
$accept : _S $end
%%
a shift 2
S : ’a’ A ’a’
b shift 3
| ’b’ A ’b’
. error
| ’a’ B ’b’
| ’b’ B ’a’
S goto 1
;
A : ’a’ ;
state 1
B : ’a’ ;
$accept : S_$end
%%
yylex() {
$end accept
int c;
. error
while ((c=getchar()) == ’ ’);
if (c == ’\n’) return 0;
return c;
state 2
}
S : a_A a
S : a_B b
yyerror(char *s) {
printf("%s\n", s);
a shift 6
}
. error
main() {
A goto 4
if (yyparse() == 0) printf("ok\n");
B goto 5
}
23 24
6: reduce/reduce conflict (red’ns 5 and 6 ) on a
state 3 6: reduce/reduce conflict (red’ns 5 and 6 ) on b
S : b_A b state 6
S : b_B a A : a_ (5)
B : a_ (6)
a shift 6
. error . reduce 5
A goto 7
B goto 8 state 7
S : b A_b
state 4
S : a A_a b shift 11
. error
a shift 9
. error
state 8
S : b B_a
state 5
S : a B_b a shift 12
. error
b shift 10
. error
state 9
S : a A a_ (1)
. reduce 1
25 26
state 10 Lex and Yacc work well together. . .
S : a B b_ (3)
%token NUMBER
. reduce 3 %left ’+’
%left ’*’
%%
line : line expr ’\n’ { printf("%d\n", $2); }
state 11
|
S : b A b_ (2) ;
expr : expr ’+’ expr { $$ = $1 + $3; }
. reduce 2 | expr ’*’ expr { $$ = $1 * $3; }
| ’(’ expr ’)’ { $$ = $2; }
| NUMBER
state 12 ;
S : b B a_ (4) %%
#include "lex.yy.c"
. reduce 4
yyerror(char *s) {
printf("%s\n", s);
Rule not reduced: B : a }
main() {
4 terminals, 3 nonterminals yyparse();
7 grammar rules, 13 states }
0 shift/reduce, 2 reduce/reduce conflicts reported
number 0|[1-9][0-9]*
%%
[ ] {}
{number} { sscanf(yytext, "%d", &yylval);
return NUMBER; }
\n|. { return yytext[0]; }
%%
int yywrap() {
return (1);
}
27 28
> lex d3.l
> yacc d3l.y
> cc -o d3ly y.tab.c
> d3ly
1+2*3*(4+5)+45
100
11111+22222
33333
1++2
syntax error
29