Newsgroups: comp.lang.scheme
Path: cantaloupe.srv.cs.cmu.edu!das-news2.harvard.edu!news2.near.net!news.mathworks.com!udel!gatech!howland.reston.ans.net!news.sprintlink.net!EU.net!sun4nl!phcoms4.seri.philips.nl!hpcssg!batelaan
From: batelaan@ukpsshp1.serigate.philips.nl (Wouter Batelaan)
Subject: A utility function to parse lists using backtracking.
Sender: news@ukpsshp1.serigate.philips.nl (CNews account)
Message-ID: <CzGnKI.J3K@ukpsshp1.serigate.philips.nl>
Date: Fri, 18 Nov 1994 11:21:54 GMT
Lines: 22
Organization: Philips Semiconductors, Southampton, UK
X-Newsreader: TIN [version 1.2 PL2]

A basic first version of a backtracking parser.

--
 Wouter Batelaan, SDC, Philips Semiconductors, Southampton, U.K.
 email: batelaan@ukpsshp1.serigate.philips.nl (seri: batelaan@ukpsshp1)
 tel  : +44 703 316556; fax : +44 703 316303

;=============================================================================
; BTPARSE - BackTracking Parser.
; This file implements a parser using a backtracking,
; like you often see used in Prolog (DCGs or Definite Clause Grammars).
; 
; Main functions in this file:
; (btparse <input-list> <goal> <syntax-table> <success-function>)
; 
; A <syntax-table> is an association list between non-terminal names
; and their definitions (called right-hand-sides));
; <goal> is a non-terminal specified by the syntax table;
; <success-function> is a function called when the <goal> has
; been matched. It is given as first argument the parse tree,
; and as second argument the remaining, unparsed input.
; If the <success-function> returns #t, then parsing continues, using back-tracking
; to find potentially more solutions.;
; If it returns #f, then parsing stops.
;
; A sample <syntax-table>:
;  `(
;    (statement         (lvalue = expr))
;    (statement         (or-expr))
;    (or-expr           (and-expr (zero-or-more '|| and-expr)))
;    (and-expr          (not-expr (zero-or-more '&& not-expr)))
;    (not-expr          ((optional '!) equality-expr))
;    (equality-expr     (cmp-expr (optional equality-op cmp-expr)))
;    (equality-op       ('==))
;    (equality-op       ('!=))
;    (cmp-expr          (add-expr (optional cmp-op add-expr)))
;    (cmp-op            ('<))
;    (cmp-op            ('<=))
;    (cmp-op            ('>))
;    (cmp-op            ('>=))
;    (add-expr		(term (zero-or-more add-op term)))
;    (add-op             ('+))
;    (add-op             ('-))
;    (term		(factor (zero-or-more mult-op factor)))
;    (mult-op		('*))
;    (mult-op		('/))
;    (lvalue		((true? ,symbol?)))
;    (factor		((true? ,symbol?)))
;    (factor		((true? ,number?)))
;    (factor		((list expr)))
;   )
; A right hand side is a list containing one or more of:
; - a non-terminal symbol;
; - a literal, given by a quoted expression;
; - (optional ...); this indicates and optional occurance of ...
; - (zero-or-more ...); this indicates zero or more occurances of ...
; - (one-or-more ...); this indicates one or more occurances of ...
; - (true? <one-argument-function>); matches if the function applied 
;   to the input item returns true.
; - (list <non-terminal>); this matches a sublist whose contents is
;   given by the <non-terminal>.
; - (call <function>); invokes the function with first argument
;   the current parse tree sofar, and second argument the current pending input.
;   If the function returns false then backtracking will occur.
;
; btparse itself does not return a significant value.
; 
; Requires: format.
; Developed using Elk.
; Tested under: Elk.
;-------------------------------------------------------------------------

(define btparse:debug #t)
(or (not btparse:debug) (bound? 'format) (require 'format))

;-------------------------------------------------------------------------

(define (btparse <input-list> <goal> <syntax-table> <success-function>)

  (define result (list '()))
  (define accu result)
  (define continue #t)
  
  (define (format-dbg . args)  
    (if btparse:debug (apply format args) #f))

  (define (match input syntax-list pending-rules accu)
    ;(format-dbg #t "match input = ~a syntax-list = ~a accu = ~a~%" input syntax-list accu)
    
    (if (null? syntax-list)
	(if (null? pending-rules)
	    (begin
	      ;(format-dbg #t "Calling goal result = ~a (input = ~a)~%" result input)
	      (set! continue (<success-function> (cadr result) input))
	      input
	    )
	    (begin
	      (format-dbg #t "Restore from pending-rules: ~a~%" pending-rules)
	      (case (car pending-rules)
		
		(non-terminal
		 (set! accu (cdr (list-ref pending-rules 1)))
		 (set! syntax-list (list-ref pending-rules 2))
		 (set! pending-rules (list-ref pending-rules 3))
		)
		
		(restore-syntax
		 (set! syntax-list (list-ref pending-rules 1))
		 (set! pending-rules (list-ref pending-rules 2))
		)
		
		(sub-input
		 (set! accu (list-ref pending-rules 1))
		 (set! syntax-list (list-ref pending-rules 2))
		 (set! input (list-ref pending-rules 3))
		 (set! pending-rules (list-ref pending-rules 4))
		)
		
		(else
		 (error 'btparse "Unexpected type in pending-rules: ~a~%" pending-rules)
		)
	      ) ; case
	      
	      (match input syntax-list pending-rules accu)
	    ) ; begin
	)
	; else
	(let ((s (car syntax-list)))
	  (if (symbol? s)
	      (begin
		(format-dbg #t "Non-terminal ~a (input = ~a)~%" s input)
		(try-all input s (list 'non-terminal accu (cdr syntax-list) pending-rules) accu)
	      )
	      ; else a list, decode:
	      (case (car s)
		(quote
		 ; a quoted expression:
		 (if (and (pair? input) (equal? (car input) (cadr s)))
		     (matched-terminal input s syntax-list pending-rules accu)
		     ; else
		     (begin
		       (format-dbg #t "Not-Match terminal ~a (input = ~a)~%" (cadr s) input)
		       #f
		     )
		 )
		) ; quote
		
		(list
		 ; grammar item: (list <non-terminal>)
		 ; We expect a sublist:
		 ; the contents of the list must be matched using the non-terminal specified.
		 (when (list? (car input))
		       (try-all (car input) (cadr s) (list 'sub-input accu (cdr syntax-list) (cdr input) pending-rules) accu)
		 )
		) ; list
		
		(zero-or-more
		 ; grammar item: ... (zero-or-more ...) 
		 (format-dbg #t "zero-or-more ~a (input = ~a)~%" (cdr s) input)
		 ; Try with the construct:
		 ; Zero or more is same as optional, while not removing the optional construct from the syntax-list:
		 (match input (cdr s) (list 'restore-syntax syntax-list pending-rules) accu)
		 ; If we are here, the rule with the optional construct failed, so try without: 
		 ; Remove assignments:
		 (when continue 
		       (set-cdr! accu '())
		       (match input (cdr syntax-list) pending-rules accu)
		 )
		) ; zero-or-more
		
		(optional
		 ; grammar item: ... (optional ...) 
		 (format-dbg #t "Optional ~a (input = ~a)~%" (cdr s) input)
		 ; Try with the optional construct:
		 (match input (cdr s) (list 'restore-syntax (cdr syntax-list) pending-rules) accu)
		 ; If we are here, the rule with the optional construct failed, so try without: 
		 ; Remove assignments:
		 (when continue 
		       (set-cdr! accu '())
		       (match input (cdr syntax-list) pending-rules accu)
		 )
		) ; optional
		
		(one-or-more
		 ; grammar item: ... (one-or-more items...) 
		 (format-dbg #t "one-or-more ~a (input = ~a)~%" (cdr s) input)
		 ; We call match such that it has to match items... once,
		 ; followed by a (zero-or-more items...) construct,
		 ; followed by the rest of the :
		 (match input (cdr s) (list 'restore-syntax (cons (cons 'zero-or-more (cdr s)) (cdr syntax-list)) pending-rules) accu)
		) ; one-or-more
		
		(true?
		 (when (apply (cadr s) (cons (car input) (cddr s)))
		       (matched-terminal input s syntax-list pending-rules accu)
		 )
		) ; true

		(call
		 (and ((cadr s) (cadr result) input)
		      (match input (cdr syntax-list) pending-rules accu))
		) ; call
		
	      )
	  )
	)
	#f
    )
  )
  
  (define (matched-terminal input s syntax-list pending-rules accu)
    (set-cdr! accu (cons (car input) '()))
    (match (cdr input) (cdr syntax-list) pending-rules (cdr accu))
    ; Backtracking:
    (when continue (set-cdr! accu '()))
  )

  (define (try-all input non-terminal pending-rules accu)
    (format-dbg #t "try-all non-terminal = ~a accu = ~a~%" non-terminal accu)
    (let loop ((production (car <syntax-table>)) (rest-productions (cdr <syntax-table>)))
      ;(format-dbg #t "Checking production ~a~%" production)
      (when continue
	    (when (equal? non-terminal (car production))
		  ;(format-dbg #t "Trying production ~a~%" production)
		  (set-cdr! accu (list (list non-terminal)))
		  (match input (cadr production) pending-rules (cadr accu))
		  (format-dbg #t "After production ~a; accu = ~a~%" production accu)
	    )
	    (when (pair? rest-productions)
		  (loop (car rest-productions) (cdr rest-productions))
	    )
      )
    )
    ; Backtracking:
    (when continue 
	  ;(format-dbg #t "Fails:  non-terminal = ~a~%" non-terminal)
	  (set-cdr! accu '())
    )
  )

  ; The body of btparse:
  (try-all <input-list> <goal> '() accu)
)

(provide 'btparse)
