;;; Generate and interpret LL parsing tables

;;; Assumes scanner is loaded

;;; ***************************************************************

;;; Top level: (sample)

'(define parse
  (let ([table (build-LL-table 'command grammar-1)])
    (lambda (string)
      (parse-whole table 'command
	(scan-string automaton-1 cooking-function-1 string)))))


'(define parse-whole
  (lambda (table non-terminal item-list)
    (let ([result (parse-nt table non-terminal item-list)])
      (if (eq? (lexical-item->class
		 (car (parser-result->unused result)))
	       'end-marker)
	  (parser-result->tree result)
	  (error 'parse-top-level
	    "symbols left over: ~s"
	    (car (parser-result->unused result)))))))

;;; *************************************************************

;;; Extraction operators for grammars:

;;; production ::= (lhs rhs action)

(define production->lhs  car)	
(define production->rhs  cadr)
(define production->action caddr)

;;; rhs ::= (rhs-entry ...)
;;; rhs-entry ::= (token item-class) or (data-item item-class) or
;;; (non-terminal non-terminal)

(define rhs-entry->item-class    cadr)
(define rhs-entry->tag  car)

;;; ****************************************************************

;;; Creating LL tables

;;; Our general approach is generate first and follow sets by treating
;;; the grammar as a set of axioms and inference rules:

;;; Assertions:
;;; assertion ::= (first non-term term)  means term is in first(non-term).
;;; assertion ::= (follow non-term term) --similar

;;; rule ::= ((first B) (follow A)) --means (first B x) => (follow A x)

(define build-LL-table
  (lambda (start-symbol grammar)
    (let* ([rules&axioms (grammar->rules start-symbol grammar)]
	   [rules (rules&axioms->rules rules&axioms)]
	   [axioms (rules&axioms->axioms rules&axioms)]
	   [assertions (deductive-closure apply-rule rules axioms)])
      (assemble-LL-table assertions grammar))))

(define grammar->rules
  (lambda (start-symbol grammar)
    (cons
      `(follow ,start-symbol end-marker)
      (apply append 
	     (map production->rules grammar)))))

;;; Here's where we generate the rules:

(define production->rules
  (lambda (production)
    (let ([a (production->lhs production)]
	  [rhs (production->rhs production)]
	  [complain (lambda (rhs)
		      (error 'production->rules
			"can't treat entry ~s"
			(car rhs)))])
      (letrec
	([loop (lambda (rhs)
		 ;; assume rhs is non-empty
		 '(printf "inner loop: rhs = ~s~%" rhs)
		 (if
		   (null? (cdr rhs))
		   ;; normal termination for this loop
		   (record-case (car rhs)
		     [non-terminal (b)
		       (list `((follow ,b) (follow ,a)))]
		     [data-item (i) '()]
		     [token (i) '()]
		     [else (complain rhs)])
		   (record-case (car rhs)
		     [non-terminal (b)
		       (record-case (cadr rhs)
			 [non-terminal (c)
			   (cons `((first ,c) (follow ,b))
				 (loop (cdr rhs)))]
			 [data-item (x)
			   (cons `(follow ,b ,x) (loop (cdr rhs)))]
			 [token (x)
			   (cons `(follow ,b ,x) (loop (cdr rhs)))]
			 [else (complain (cdr rhs))])]
		     [data-item (i) (loop (cdr rhs))]
		     [token (i) (loop (cdr rhs))]
		     [else (complain rhs)])))])
	(let ([first-rule
		(if
		  (null? rhs)
		  `((follow ,a) (first ,a))
		  (record-case (car rhs)
		    [non-terminal (b) `((first ,b) (first ,a))]
		    [data-item (x) `(first ,a ,x)]
		    [token (x) `(first ,a ,x)]
		    [else  (complain rhs)]))])
	  (if (null? rhs)
	    (list first-rule)
	    (cons first-rule (loop rhs))))))))
	    
;;; Exercise:  extend this to handle while-lookahead-not and
;;; parse-separated-list 
		 
(define rules&axioms->rules
  (lambda (l)
    (cond
      [(null? l) '()]
      [(symbol? (caar l)) (rules&axioms->rules (cdr l))]
      [else (cons (car l) (rules&axioms->rules (cdr l)))])))

(define rules&axioms->axioms
  (lambda (l)
    (cond
      [(null? l) '()]
      [(symbol? (caar l))
       (cons (car l) (rules&axioms->axioms (cdr l)))]
      [else
	(rules&axioms->axioms (cdr l))])))

(define apply-rule
  (lambda (rule assertion succeed fail)
    ;; rule ::= ((tag1 nt1) (tag2 nt2))
    ;; assertion  ::= (tag nt x)
    (if (and
	  (eq? (caar rule) (car assertion))
	  (eq? (cadar rule) (cadr assertion)))
      (succeed
	`(,(caadr rule) ,(cadr (cadr rule)) ,(caddr assertion)))
      (fail))))

(define deductive-closure
  (lambda (apply-rule rules axioms)
    (let outer-loop ([old '()][new axioms])
      (if (null? new)
	old
	(let assertion-loop ([newer '()][assertions new])
	  (if (null? assertions)
	    (outer-loop (append new old) newer)
	    (let rule-loop ([newer newer][rules rules])
	      (if (null? rules)
		(assertion-loop newer (cdr assertions))
		(apply-rule (car rules) (car assertions)
		  (lambda (conclusion)
		    (if (already-deduced? conclusion
			  newer new old)
		      (rule-loop newer (cdr rules))
		      (rule-loop (cons conclusion newer)
			(cdr rules))))
		  (lambda () (rule-loop newer (cdr rules))))))))))))

;;; this should be replaced by some kind of hash table

(define already-deduced?
  (lambda (conclusion newer new old)
    (or (member conclusion newer)
	(member conclusion new)
	(member conclusion old))))

(define filter
  (lambda (p l)
    (cond
      [(null? l) '()]
      [(p (car l)) (cons (car l)
			 (filter p (cdr l)))]
      [else (filter p (cdr l))])))

;;; Now we start constructing the LL tables
;;; LL tables look just like productions, except we have
;;;
;;; lhs ::= (nt list-of-terminals)
;;;
;;; The list of terminals tells find-production which terminals this
;;; production is applicable to.

(define assemble-LL-table
  (lambda (assertions grammar)
    (map
      (lambda (production)
	(assemble-LL-production assertions production))
      grammar)))

(define assemble-LL-production
  (lambda (assertions production)
    (let ([lhs (production->lhs production)]
	  [rhs (production->rhs production)]
	  [action (production->action production)])
      `((,lhs ,(extract-leading-terminals lhs rhs assertions))
	,rhs
	,action))))

(define filter-assertions
  (lambda (tag non-terminal assertions)
    (map caddr
	 (filter
	   (lambda (assertion)
	     (and (eq? (car assertion) tag)
		  (eq? (cadr assertion) non-terminal)))
	   assertions))))

(define extract-leading-terminals
  (lambda (lhs rhs assertions)
    (if (null? rhs)
      (filter-assertions 'follow lhs assertions)
      (record-case (car rhs)
	[non-terminal (nt)
	  (filter-assertions 'first nt assertions)]
	[token (class) (list class)]
	[data-item (class) (list class)]
	[else (error 'extract-leading-terminals
		"unsupported rhs entry ~s" (car rhs))]))))

  
;;; ****************************************************************

;;; Interpretation of LL tables

;;; Parser-result:

(define make-parser-result
  (lambda (tree unused)
    (list tree unused)))

(define parser-result->tree    car)
(define parser-result->unused  cadr)

;;; For making records:

(define tag&fields->record     cons)

;;; *************************************************************

;;; Procedures for interpreting parse tables:

;;; The engine for interpeting parse tables is unchanged from the one
;;; in the book (as revised), except that find-production has changed
;;; to use the LL left sides.


(define parse-nt
  (lambda (grammar nt item-list)
    ;; find production relevant to this nt and (car item-list), and
    ;; call parse-production to process it.
    (let ([production
	    (find-production grammar nt
	      (lexical-item->class (car item-list)))])
      (if production
	  (parse-production grammar production item-list)
	  (error 'parse-nt
	    "Nonterminal ~s can't start with ~s:~%~s"
	    nt (lexical-item->class (car item-list))
	    (car item-list)
	    )))))

(define find-production
  (lambda (grammar non-terminal item-class)
    (linear-search grammar
      (lambda (production)
	(let ([lhs (production->lhs production)])
	  '(printf "find-production: lhs = ~s~%" lhs)
	  (and (eq? (car lhs) non-terminal)
	       (memq item-class (cadr lhs))))))))


(define parse-production
  (lambda (grammar production item-list)
    ;; production ::= (lhs rhs action).  Call parse-rhs to process rhs.
    ;; This returns a parser-result containing a list of subtrees.
    ;; Last, create a parser-result containing the result of applying
    ;; the action to the list of subtrees.
    (let ([result (parse-rhs grammar
		    (production->rhs production)
		    item-list)]) 
      (make-parser-result
	(apply
	  (production->action production)
	  (parser-result->tree result))
	(parser-result->unused result)))))

(define parse-rhs
  (lambda (grammar rhs item-list)
    '(printf "parse-rhs: rhs = ~s ; (car item-list) = ~s~%"
      rhs (car item-list))
    (if (null? rhs)
	(make-parser-result '() item-list)
	(record-case (car rhs)
	  ;; first entry is (token item-class).  Check to see if first
	  ;; item matches the item-class.  If so, parse (cdr
	  ;; item-list) against (cdr rhs).
	  [token (item-class)
	    (if (eq? (lexical-item->class (car item-list))
		     item-class)
	      (parse-rhs grammar (cdr rhs) (cdr item-list))
	      (error 'parse-rhs "Looking for: ~s found: ~s"
		item-class
		(lexical-item->class (car item-list))))]
	  ;; first entry is (data-item item-class).  Call
	  ;; parse-data-item to check the first item, and pass the
	  ;; result to finish-rhs, which will finish parsing the rhs.
	  [data-item (item-class)
	    (finish-rhs
	      (parse-data-item item-class item-list)
	      grammar (cdr rhs))]
	  ;; (non-terminal nt) -- similar
	  [non-terminal (nt)
	    (finish-rhs
	      (parse-nt grammar nt item-list)
	      grammar (cdr rhs))]
	  [while-lookahead-not (item-class nt)
	    (finish-rhs
	      (parse-while-lookahead-not
		item-class nt grammar item-list)
	      grammar (cdr rhs))]
	  [separated-list (nt item-class)
	    (finish-rhs
	      (parse-separated-list
		item-class nt grammar item-list)
	      grammar (cdr rhs))]
	  [else (error 'parse-rhs "bad rhs: ~s" rhs)]
	  ))))

(define finish-rhs
  (lambda (result1 grammar rhs)
    ;; run rhs against unused portion in result1, then cons together
    ;; the resulting trees.
    (let ([result2
	    (parse-rhs grammar rhs
	      (parser-result->unused result1))])
      (make-parser-result
	(cons (parser-result->tree result1)
	      (parser-result->tree result2))
	(parser-result->unused result2)))))

(define parse-data-item
  (lambda (item-class item-list)
    ;; if (car item-list) belongs to item-class, create a
    ;; parser-result containing its data field.  If not, report an
    ;; error. 
    (if (eq? (lexical-item->class (car item-list))
	     item-class)
      (make-parser-result
	(lexical-item->data (car item-list))
	(cdr item-list))
      (error 'parse-rhs-data-item "Looking for: ~s found: ~s"
	item-class
	(lexical-item->class (car item-list))))))

(define parse-while-lookahead-not
  (lambda (item-class nt grammar item-list)
    ;; returns parse tree showing list of parse trees.
    (if
      (eq? (lexical-item->class (car item-list))
	   item-class)
      ;; if the lookahead item matches the terminator, then we're
      ;; done-- there are no elements in the resulting list.
      (make-parser-result '() item-list)
      ;; otherwise, run the rhs, recur on what's left over, and then
      ;; cons the results together.
      (let ([result1 (parse-nt grammar nt item-list)])
	(let ([result2 (parse-while-lookahead-not
			 item-class nt grammar
			 (parser-result->unused result1))])
	  (make-parser-result
	    (cons (parser-result->tree result1)
		  (parser-result->tree result2))
	    (parser-result->unused result2)))))))

(define parse-separated-list
  (lambda (item-class nt grammar item-list)
    (letrec
      ([loop (lambda (item-list)
	       ;; {item-class nt}*
	       (if (not (eq? (lexical-item->class (car item-list))
			     item-class))
		 (make-parser-result
		   '()
		   item-list)
		 (let ([result1 (parse-nt grammar nt (cdr item-list))])
		   (let ([result2
			   (loop (parser-result->unused result1))])
		     (make-parser-result
		       (cons
			 (parser-result->tree result1)
			 (parser-result->tree result2))
		       (parser-result->unused result2))))))])
      (let ([result1 (parse-nt grammar nt item-list)])
	(let ([result2 (loop (parser-result->unused result1))])
	  (make-parser-result
	    (cons (parser-result->tree result1)
		  (parser-result->tree result2))
	    (parser-result->unused result2)))))))

;;; ****************************************************

;;; Sample grammar

(define **keywords-list** '(begin2 begin end while do if then else))

(define grammar-1
  '((command
      ((token begin2) (non-terminal command) 
       (token semicolon) (non-terminal command)
       (token semicolon) (token end))
      compound-command)
    (command
      ((token begin)
       (non-terminal command-list)
       (token end))
      sequence-command)
    (command-list			; this makes semicolon a terminator
      ()
      *empty-list)
    (command-list
      ((non-terminal command)
       (token semicolon)
       (non-terminal command-list))
      *command-list)
    (command
      ((token while) (non-terminal expression)
       (token do) (non-terminal command))
      while-command)
    (command
      ((token if) (non-terminal expression)
       (token then) (non-terminal command)
       (token else) (non-terminal command))
      if-command)
    (command
      ((data-item identifier) (token assign-sym)
       (non-terminal expression))
      assignment-command)
    (expression
      ((data-item identifier)) identifier-expression)
    (expression
      ((token lparen) (non-terminal expression)
       (token plus-sym) (non-terminal expression)
       (token rparen))
      sum-expression)))

;;; Tests

(define test1
  (lambda () (parse "x := y")))

(define test2
  (lambda () (parse "begin2 x:= y ; y := zz; end")))

(define test3
  (lambda () (parse "begin x:=y; y:=z; z:=x; end")))

; > (test1)
; (assignment-command x (identifier-expression y))
; > (test2)
; (compound-command
;    (assignment-command x (identifier-expression y))
;    (assignment-command y (identifier-expression zz)))
; > (test3)
; (sequence-command
;    (*command-list
;       (assignment-command x (identifier-expression y))
;       (*command-list
;          (assignment-command y (identifier-expression z))
;          (*command-list
;             (assignment-command z (identifier-expression x))
;             (*empty-list)))))

