Readable Lisp S-expressions Git Repository

Readable Lisp/S-expressions with infix, functions, and indentation

Brought to you by: dwheeler
[660311]: / neoteric.lisp Maximize Restore History
477 lines (417 with data), 21.9 kB

;;; neoteric.cl
;;; Implements neoteric-expressions from the "readable" approach for Lisp.

;;; Copyright (C) 2007-2013 by David A. Wheeler
;;;
;;; This software is released as open source software under the "MIT" license:
;;;
;;; Permission is hereby granted, free of charge, to any person obtaining a
;;; copy of this software and associated documentation files (the "Software"),
;;; to deal in the Software without restriction, including without limitation
;;; the rights to use, copy, modify, merge, publish, distribute, sublicense,
;;; and/or sell copies of the Software, and to permit persons to whom the
;;; Software is furnished to do so, subject to the following conditions:
;;;
;;; The above copyright notice and this permission notice shall be included
;;; in all copies or substantial portions of the Software.
;;;
;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
;;; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
;;; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
;;; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
;;; OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
;;; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
;;; OTHER DEALINGS IN THE SOFTWARE.

;;; Neoteric-expressions themselves are a very simple notation and their
;;; basic implementation is also simple.
;;; Unfortunately, some quirks in Common Lisp (CL) mean that we have to do
;;; some work-arounds as compared to other Lisps like Scheme:
;;; 1. CL "read" by default consumes trailing whitespace, even if the
;;;    the whitespace is NOT part of the datum being read at all.
;;;    This is an unfortunate quirk, and in my view, a bug.  As a result,
;;;    if you just do a "read", a later datum may look
;;;    like a neoteric tail.  E.G., given: "x (y)", a simple read of "x"
;;;    will consume the space after "x"; naively checking for a "tail"
;;;    would them make it look like "x(y)" instead, wrongly producing "(x y").
;;;    For example, if we naively pass down "recursive" as "t" in
;;;    all places, it'll use this default and consume trailing whitespace.
;;;    Thus, we have to be careful about calling read or calling any
;;;    reads with recursive=t. Instead, we'll typically call
;;;    "read-preserving-whitespace", call with recursive=nil, or specially
;;;    extract characters into a string for reading.
;;; 2. There's no portable way to directly replace the "read" procedure,
;;;    so we must manipulate the readtable to do what we want.
;;;    We end up wrapping all constituents so that we can prevent consuming
;;;    trailing whitespace, to distinguish "x(y)" from "x (y)".



(cl:in-package :readable)

(defvar *neoteric-underlying-readtable* (copy-readtable)
        "Use this table when reading neoteric atoms")

(defvar *neoteric-readtable* (copy-readtable)
        "Use this table when about to read a neoteric expression")


(defvar *noisy* t
        "If true, a parse error prints an error message to standard out")

; Work around SBCL nonsense that makes its "defconstant" useless.
; See: http://www.sbcl.org/manual/Defining-Constants.html
; This is disabled, because we already define it in "print".
; (defmacro define-constant (name value &optional doc)
;  `(defconstant ,name (if (boundp ',name) (symbol-value ',name) ,value)
;                      ,@(when doc (list doc))))

; Marker for eof
(define-constant my-eof-marker (make-symbol "my-eof-marker"))

(define-condition readable-parse-error (parse-error)
  ((text :initarg :text :reader text)))

(defun read-error (message)
  (when *noisy*
    (terpri *standard-output*)
    (princ "Error: " *standard-output*)
    (princ message *standard-output*)
    (terpri *standard-output*))
  (error 'readable-parse-error :text message))

; Unfortunately, clisp will write all symbols with |...| around them
; when neoteric- or sweet-expressions are enabled.
; The issue is in clisp file "src/io.d" function "pr_symbol_part".
; We hope to be able to modify clisp in the future so it can be disabled,
; but for now, just carry one.

; NOTE: clisp's "peek-char" has a serious bug; it defaults to CONSUME
; a following whitespace, contravening the Common Lisp spec:
;    http://www.lispworks.com/documentation/HyperSpec/Body/f_peek_c.htm
; We work around this by ALWAYS providing peek-char with 2 parameters
; ("nil" and the stream name) when we don't want to skip whitespace.

; Test to ensure that peek-char (as we use it) works.
(with-input-from-string (test-input "Q56 T78")
  (progn
    ; Read "Q56"; this should NOT consume the space after it.
    (read-preserving-whitespace test-input t nil)
    (let ((c (peek-char nil test-input)))
      (when (not (eql c #\space))
        (terpri) (terpri) (terpri)
        (princ "*** WARNING WARNING WARNING ***") (terpri)
        (princ "Procedure read-preserving-whitespace or peek-char") (terpri)
        (princ "FAIL to preserve whitespace following expressions.") (terpri)
        (princ "*** WARNING WARNING WARNING ***") (terpri)
        (terpri) (terpri) (terpri)
        (error "peek-char BUG")))))

; Set of constituents - these will be overridden.
; We should support arbitrary UTF-8 characters, but it can be complicated
; to do so portably.  For now, we'll define this as a variable so it
; can be overridden.
(defvar *constituents*
    '(#\! #\$ #\% #\& #\* #\+ #\- #\. #\/
      #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9
      #\: #\< #\= #\> #\? #\@
      #\A #\B #\C #\D #\E #\F #\G #\H #\I #\J #\K #\L #\M
      #\N #\O #\P #\Q #\R #\S #\T #\U #\V #\W #\X #\Y #\Z
      #\^ #\_
      #\a #\b #\c #\d #\e #\f #\g #\h #\i #\j #\k #\l #\m
      #\n #\o #\p #\q #\r #\s #\t #\u #\v #\w #\x #\y #\z #\~
      #\rubout )) ; Rubout, really?!?  Yup, it's in the spec.

; TODO: Add UTF-8 constituents to *constituents* if the underlying
; system supports them in the readtable.

;;; Key procedures to implement neoteric-expressions

; These delimiting characters stop reading of symbols or non-datums
; (e.g., after ".").  We'll define these as a "variable" so that other
; routines can reach in and change them, if truly necessary.
(defvar neoteric-delimiters
  '(#\( #\) #\[ #\] #\{ #\} #\space #\tab #\newline #\return #\#
    #\' #\` #\,))

(defun string-before-delimiter (input-stream start)
  (concatenate 'string start
    (loop for c = (peek-char nil input-stream nil my-eof-marker)
          until (or (eq c my-eof-marker) (find c neoteric-delimiters))
          collect (read-char input-stream))))

; Note: In traditional Common Lisp, symbols with just multiple "."
; are illegal, especially "...".  For the moment we'll allow it.

; Read a datum and ALLOW "." as a possible value:
(defun my-read-to-delimiter (input-stream start)
  (let* ((*readtable* *neoteric-underlying-readtable*) ; Temporary switch
         (my-string (string-before-delimiter input-stream start)))
    (if (string= my-string ".")
        '|.|
        (read-from-string my-string))))

(defun my-read-datum (input-stream)
  (let* ((c (peek-char t input-stream))) ; Consume leading whitespace
    (cond
      ((eql c #\.) ; Use specialized reader if starts with "."
        (my-read-to-delimiter input-stream ""))
      (t (read-preserving-whitespace input-stream t nil)))))

(defun my-read-delimited-list (stop-char input-stream)
 (handler-case
  (let* ((c (peek-char t input-stream))) ; First consume leading whitespace
    (cond
      ((eql c stop-char)
        (read-char input-stream)
        '())
      ; Balance ([{
      ((or (eql c #\)) (eql c #\]) (eql c #\}))
        (read-char input-stream)
        (read-error "Bad closing character."))
      (t
        ; Must preserve whitespace so "a ()" isn't read as "a()"
        (let ((datum (my-read-datum input-stream)))
          (cond
             ; Note: "." only counts as cdr-setting if it begins with "."
             ((and (eq datum '|.|) (eql c #\.))
               (let ((datum2 (read-preserving-whitespace input-stream t nil)))
                 ; (consume-whitespace input-stream)
                 (cond
                   ; ((eof-object? datum2)
                   ; (read-error "Early eof in (... .).")
                   ; '())
                   ; The following peek-char has side-effect of skipping
                   ; whitespace after last datum, so "(a . b )" works.
                   ((not (eql (peek-char t input-stream) stop-char))
                    (read-error "Bad closing character after . datum."))
                   (t
                     (read-char input-stream)
                     datum2))))
             (t
                 (cons datum
                   (my-read-delimited-list stop-char input-stream))))))))))


; Implement neoteric-expression's prefixed (), [], and {}.
; At this point, we have just finished reading some expression, which
; MIGHT be a prefix of some longer expression.  Examine the next
; character to be consumed; if it's an opening paren, bracket, or brace,
; then the expression "prefix" is actually a prefix.
; Otherwise, just return the prefix and do not consume that next char.
; This recurses, to handle formats like f(x)(y).
(defun neoteric-process-tail (input-stream prefix)
    (let* ((c (peek-char nil input-stream nil my-eof-marker)))
      (cond
        ((eq c my-eof-marker) prefix)
        ((eql c #\( ) ; Implement f(x).
          (read-char input-stream nil nil t) ; consume opening char
          (neoteric-process-tail input-stream
              (cons prefix (my-read-delimited-list #\) input-stream))))
        ((eql c #\[ )  ; Implement f[x]
          (read-char input-stream nil nil t) ; consume opening char
          (neoteric-process-tail input-stream
                (cons '$bracket-apply$
                  (cons prefix
                    (my-read-delimited-list #\] input-stream)))))
        ((eql c #\{ )  ; Implement f{x}.
          (read-char input-stream nil nil t) ; consume opening char
          (neoteric-process-tail input-stream
            (let ((tail (process-curly
                          (my-read-delimited-list #\} input-stream))))
              (if (null tail)
                (list prefix) ; Map f{} to (f), not (f ()).
                (list prefix tail)))))
        (t prefix))))


;;; Dispatch procedures.

; Read until }, then process list as infix list.
(defun neoteric-curly-brace (stream char)
  (declare (ignore char)) ; {
  (let ((result (my-read-delimited-list #\} stream)))
    (neoteric-process-tail stream (process-curly result))))

; Read preserving whitespace using the underlying readtable, then
; apply a neoteric tail if necessary.  This is necessary for handling
; various situations (including constituent characters)
; to ensure that trailing whitespace is NEVER consumed before looking for
; the tail.  Otherwise '{a + {b * c}} will be incorrectly
; interpreted as (A (+ (* B C))) instead of the correct (+ A (* B C)).
; That's because if the whitespace after "+" is (incorrectly)
; consumed, it will be interpreted as '{a +{b * c}}.

(defun wrap-read-n-tail (stream char)
  (unread-char char stream)
  (let ((saved-readtable *readtable*))
    (setq *readtable* *neoteric-underlying-readtable*)
    ; Do NOT make recursive, or spaces after atoms will be consumed.
    (let ((atom (read-preserving-whitespace stream t nil)))
      (setq *readtable* saved-readtable)
      (neoteric-process-tail stream atom))))

(defun wrap-continue (stream char)
  ; Call routine from original readtable, without removing, and
  ; invoke neoteric-process-tail.
  (neoteric-process-tail stream
    (funcall
      (get-macro-character char *neoteric-underlying-readtable*)
      stream char)))

(defun wrap-dispatch-tail (stream sub-char int)
  ; Call routine from original readtable, but leave our readtable in place,
  ; and invoke neoteric-process-tail.
  (neoteric-process-tail stream
    (funcall
      (get-dispatch-macro-character #\# sub-char
                                    *neoteric-underlying-readtable*)
      stream sub-char int)))

(defun wrap-dispatch-disabled-tail (stream sub-char int)
  ; Call routine from original readtable and disable temporarily our
  ; readtable.  Then invoke neoteric-process-tail.

  ; This is more convoluted than you'd expect, because
  ; Common Lisp's "read" provides no simple way to *prevent*
  ; consuming trailing whitespace if the read is at the top level.
  ; When that happens, trailing whitespace will be consumed *BEFORE* the
  ; neoteric-tail check is performed.  If, after the whitespace, there's
  ; something that looks like a tail, the wrong result will occur.
  ; E.G., the neoteric expression:
  ;   '#B101 (quote x)
  ; would in the "obvious" implementation be the incorrect: (5 |QUOTE| |X|)
  ; instead of the correct 5 followed by x.
  ; To deal with this, we collect all the characters before a delimiter,
  ; put them into a string, and read from the string instead.
  (neoteric-process-tail stream
    (let* ((ctext (string-before-delimiter stream ""))
           (*readtable* *neoteric-underlying-readtable*)) ; temporary switch.
      (with-input-from-string (string-stream ctext)
        (funcall
          (get-dispatch-macro-character #\# sub-char
                                      *neoteric-underlying-readtable*)
          string-stream sub-char int)))))

(defun wrap-dispatch-disabled-notail (stream sub-char int)
  ; Call routine from original readtable and disable temporarily our
  ; readtable.  Do NOT invoke neoteric-process-tail.
  ; There's no obvious way to *prevent* this from consuming
  ; trailing whitespace if the top-level routine consumed trailing whitespace.
    (let ((*readtable* *neoteric-underlying-readtable*)) ; temporary switch.
      (funcall
        (get-dispatch-macro-character #\# sub-char
                                      *neoteric-underlying-readtable*)
        stream sub-char int)))

(defun wrap-dispatch-special-read-tail (stream sub-char int)
  ; Get chars until a delimiter, then read it by disabling temporarily our
  ; readtable.  Then invoke neoteric-process-tail.
  (declare (ignore int))
  (unread-char sub-char stream)
  (neoteric-process-tail stream
    (let ((*readtable* *neoteric-underlying-readtable*)) ; temporary switch.
      (my-read-to-delimiter stream "#"))))

(defun wrap-paren (stream char)
  (neoteric-process-tail stream
    (my-read-delimited-list ; (
      (if (eql char #\[) #\] #\) )
      stream)))


;;; Enablers

(defun enable-neoteric-real ()
  (when (setup-enable 'neoteric)
    (setq *neoteric-underlying-readtable* (copy-readtable))
    (set-macro-character #\{ #'neoteric-curly-brace nil
      *neoteric-underlying-readtable*) ; (
    (set-macro-character #\} (get-macro-character #\)) nil
      *neoteric-underlying-readtable*)
    (unless (get-macro-character #\[ )
      (set-macro-character #\[ #'wrap-paren nil
        *neoteric-underlying-readtable*))
    (unless (get-macro-character #\] ) ; (
      (set-macro-character #\] (get-macro-character #\) ) nil
        *neoteric-underlying-readtable*))

    ; Wrap all constituents.  Presume ASCII for now.
    ; TODO: Don't wrap if they aren't constituents any more.
    (dolist (c *constituents*)
      (set-macro-character c #'wrap-read-n-tail nil))

    ; We need to do this so symbols starting with an escape will work:
    (set-macro-character #\\ #'wrap-read-n-tail nil)
    (set-macro-character #\| #'wrap-read-n-tail nil)

    ; This ensures that "hi"(5) => ("hi" 5)
    (set-macro-character #\" #'wrap-read-n-tail nil)

    ; Wrap character pairs.
    (set-macro-character #\( #'wrap-paren nil) ; )
    (set-macro-character #\{ #'neoteric-curly-brace nil) ; (
    (set-macro-character #\} (get-macro-character #\) ) nil)
    (unless (get-macro-character #\[ )
      (set-macro-character #\[ #'wrap-paren nil)) ; (
    (unless (get-macro-character #\] )
      (set-macro-character #\] (get-macro-character #\) ) nil))

    ; Now deal with dispatch macro char; we'll just deal with default "#".
    ; set-dispatch-macro-character disp-char sub-char function
    ;                              &optional readtable
    ;    Where "function" takes parameters (stream char arg).
    ; get-dispatch-macro-character disp-char sub-char &optional readtable
    ; See: http://www.cs.cmu.edu/Groups/AI/html/cltl/clm/node191.html
    ;
    ; How we wrap it depends on what will follow the macro char construct:
    ; - Datums-to-follow like #+ and #;.  No change; the default
    ;   neoteric readtable already handles datums.
    ; - Undefined or "signals error" - no change.
    ; - "Special-meaning" like #x.  These aren't followed by a datum. Instead,
    ;   this is a sequence of characters that represents some special value.
    ;   These characters (including the characters that started them)
    ;   are read until a delimiter, put in a string, and read from the string.
    ;   The result is then processed specially to look for
    ;   a neoteric tail.  That way, constructs like "#xa(#xb)" work and are
    ;   distinguished from  "#xa (#xb)". Use #'wrap-dispatch-disabled-tail.
    ;
    ; See Common Lisp hyperspec section 2.4.8
    ; http://www.lispworks.com/documentation/HyperSpec/Body/02_dh.htm
    ; Below is every standard # macro character syntax (except undefined
    ; and signals error), in order (note the "debatable" ones):

    ;   ##      = reference to #= label    - Intentionally not wrapped
    ;   #'      = function abbreviation    - Intentionally not wrapped
    ;   #(...)  = vector                   - Intentionally not wrapped
    ;   #*      = bit-vector               - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\* #'wrap-dispatch-disabled-tail)
    ;   #,      = (was) load-time eval [Steele] - Intentionally not wrapped
    ;   #0..9   = used for infix arguments - Can't really wrap anyway.
    ;   #:      = uninterned symbol        - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\: #'wrap-dispatch-disabled-tail)
    ;   #;      = datum comment (extension)- Intentionally not wrapped
    ;   #=      = label following object   - Intentionally not wrapped
    ;   #\char  = character object         - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\\ #'wrap-dispatch-special-read-tail)
    ;   #|...|# = balanced comment         - Intentionally not wrapped
    ;   #+      = read-time conditional    - Intentionally not wrapped
    ;   #-      = read-time conditional    - Intentionally not wrapped
    ;   #.      = read-time evaluation     - Intentionally not wrapped
    ;   #A,#a   = array                    - Not currently wrapped (debatable).
    ;   #B,#b   = binary rational          - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\B #'wrap-dispatch-disabled-tail)
    (set-dispatch-macro-character #\# #\b #'wrap-dispatch-disabled-tail)
    ;   #C,#c   = complex number           - Not currently wrapped (debatable).
    ;             Complex numbers, because of their format, are tricky to wrap,
    ;             and there's no compelling reason to do so.
    ;   #O,#o   = octal rational           - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\O #'wrap-dispatch-disabled-tail)
    (set-dispatch-macro-character #\# #\o #'wrap-dispatch-disabled-tail)
    ;   #P,#p   = pathname                 - Not wrapped currently (debatable).
    ;             In the future this might be wrapped for #p"hi"(5), but
    ;             it's not obvious it would ever be used that way.
    ;   #R,#r   = radix-n rational         - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\R #'wrap-dispatch-disabled-tail)
    (set-dispatch-macro-character #\# #\r #'wrap-dispatch-disabled-tail)
    ;   #S,#s   = structure                - Not currently wrapped (debatable).
    ;   #X,#x   = hexadecimal rational     - Special-meaning, wrapped
    (set-dispatch-macro-character #\# #\X #'wrap-dispatch-disabled-tail)
    (set-dispatch-macro-character #\# #\x #'wrap-dispatch-disabled-tail)

    ; Save in separate variable, so "sweet" can just create its own if needed
    (setq *neoteric-readtable* *readtable*))

  (values))


; Read until }, then process list as infix list.
(defun full-curly-brace-infix-reader (stream char)
  (declare (ignore char))
  (let ((*readtable* *readtable*) ; Setup to restore on return.
        (*readable-active* *readable-active*))
    (enable-neoteric-real)
    (let* ((result (my-read-delimited-list #\} stream))
           (processed-result (process-curly result)))
      processed-result)))

(defun enable-full-curly-infix-real ()
  (when (setup-enable 'full-curly-infix)
    ; Invoke curly-brace-infix-reader when opening curly brace is read in:
    (set-macro-character #\{ #'full-curly-brace-infix-reader) ; (
    ; This is necessary, else a cuddled closing brace will be part of an atom:
    (set-macro-character #\} (get-macro-character #\) nil)))
  (values)) ; Meaning "Did it"

(defun curly-infix-read (&optional (stream *standard-input*))
  (let ((*readtable* *readtable*) ; Setup to restore on return.
        (*readable-active* *readable-active*))
    (enable-full-curly-infix-real)
    (read stream)))

(defun neoteric-read (&optional (stream *standard-input*))
  (let ((*readtable* *readtable*) ; Setup to restore on return.
        (*readable-active* *readable-active*))
    (enable-neoteric-real)
    (read stream)))


;   (defun neoteric-filter ()
;     (handler-case
;       (do ((result (neoteric-read) (neoteric-read)))
;         (nil nil)
;         (write result)
;         (terpri))
;       (end-of-file ())))
;
;   (defun neoteric-load (filename)
;    (handler-case
;     (with-open-file (s (make-pathname :name filename) :direction :input)
;       (do ((result (neoteric-read s) (neoteric-read s)))
;         (nil nil)
;         (eval result)))
;     (end-of-file () )))


; TODO: Add writers, e.g., neoteric-write.
Readable Lisp S-expressions Git Repository

Readable Lisp/S-expressions with infix, functions, and indentation

Branches

Tags

[660311]: / neoteric.lisp Maximize Restore History

477 lines (417 with data), 21.9 kB