diff options
| author | 2025-04-25 13:08:20 -0400 | |
|---|---|---|
| committer | 2025-04-25 13:08:20 -0400 | |
| commit | 88c84c4decd258f9b199d58a6b5f93a034dee0bf (patch) | |
| tree | 159a95a65456de5a5d31327fdd37f8aa7a57e597 | |
| parent | fix pattern list creation (diff) | |
rewrite expander
Expander now uses dynamically scoped variables. The old lexical scope
variable implementation was getting to four nested lambdas, which was
unreadable.
The dynamic variables are clearly labeled and segregated from the rest
of the code. The actual compiler interacts with the dynamic variables
through an API.
The previous compiler did not handle empty ellipses match properly.
This has some more work to fix that.
| -rw-r--r-- | multisyntax/patterns.scm | 389 | ||||
| -rw-r--r-- | multisyntax/patterns.sld | 5 | ||||
| -rw-r--r-- | test/patterns.scm | 27 |
3 files changed, 276 insertions, 145 deletions
diff --git a/multisyntax/patterns.scm b/multisyntax/patterns.scm index c31f9b7..189cbac 100644 --- a/multisyntax/patterns.scm +++ b/multisyntax/patterns.scm @@ -15,60 +15,227 @@ | Syntax pattern matcher for syntax-rules and syntax-case. | The matcher is a backtracking recursive tracker. | - | The "compiler" turns the pattern into lambdas. The lambdas are - | `map * syntax -> (or map #f)`. + | The "compiler" turns the pattern into lambdas. | - | The map will map identifiers to matched values. These are either + | The map will return either `#f` (failure) or a map from identifiers to + | either | | 1. Syntax objects, or - | 2. Reversed lists containined matched values. + | 2. Reversed lists containined repeated matched values (for ellipsis + | patterns). + | + | The pattern matcher compiler outputs the nesting level of each + | identifier, which allows a user to differentiate between lists and + | ellipsis pattern values. + | + | The matcher is implemented as a recursive procedure that accumulates + | the result map as it continues. When it encounters an ellipsis pattern, + | the pattern P that the ellipsis pattern repeats is called with an empty + | map. If the pattern P returns, the returned identifiers inside of that + | pattern are inserted into the result map as elements of a listm and then + | P is called again. This way, multiple values from an ellipsis can be + | collected. |# -(define empty-map - (hashmap bound-identifier-comparator)) +;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Parameter objects for the parser +;;; +;;; The state of the pattern matcher is kept in parameter objects. This +;;; is to make the code simpler to read. The previous implementation was +;;; getting to four nested levels of procedures because it kept everything +;;; in lexical scope. -(define (compile-single-list match-patcar) - (lambda (names stx) - (let ((stx (unwrap-syntax stx))) - (and (pair? stx) - (null? (unwrap-syntax (cdr stx))) - (match-patcar names (car stx)))))) +(define nesting-level + ;; Current ellipsis nesting level. + (make-parameter 0)) + +(define actual-ellipsis-container + ;; Parameter object that contains the ellipsis identifier. + (make-parameter #f)) + +(define literals-parameter + ;; Parameter object that contains the literals. + (make-parameter #f)) -(define-record-type <matched-ellipsis> - (make-matched-ellipsis reversed-list) - matched-ellipsis? - (reversed-list matched-ellipsis-reversed-list)) +(define bindings + ;; Parameter object that contains a box that contains a mapping from + ;; identifiers to their nesting level. + (make-parameter #f)) -(define (push-to-matched-ellipsis key matched) - (make-matched-ellipsis (cons key - (matched-ellipsis-reversed-list matched)))) +(define bound-here + ;; Parameter object that contains a box that contains a map of + ;; identifiers at the current nesting level to the empty list. + ;; When this parameter is false, then the compiler is not in a ellipsis + ;; nesting level. + ;; + ;; The map is the default match bindings for those identifiers (i.e. zero + ;; matches). + (make-parameter #f)) + +(define (call/nesting-level procedure . args) + ;; Invoke (procedure args ...) with a higher nesting level and an empty + ;; `bound-here` set. + ;; + ;; Returns (values return map): + ;; + ;; 1. `return` is the returned value from `(procedure args ...)` + ;; 2. `map` is the map of identifiers at this nesting level to their + ;; default values (the empty list). + (let ((old-bound-here-box (bound-here))) + (parameterize ((nesting-level (+ (nesting-level) 1)) + (bound-here (box (empty-map)))) + (let ((returned (apply procedure args))) + (values returned (unbox (bound-here))))))) + +(define (actual-ellipsis? identifier) + ;; Returns `#t` if `id` is an ellipsis, and `#f` otherwise. + (cond + ((actual-ellipsis-container) => (cut <> identifier)) + (else #f))) + +(define (add-name! identifier) + ;; Add `identifier` to the name map with the current ellipsis nesting + ;; level. If the identifier is added inside of an ellipses nesting level, + ;; then it is also added to the `bound-here` map. + (let* ((the-box (bindings)) + (old (unbox the-box))) + (when (hashmap-contains? old identifier) + (error "identifier bound twice" identifier)) + (when (bound-here) + (let ((the-set (unbox (bound-here)))) + (set-box! (bound-here) (hashmap-set! the-set + identifier + '())))) + (set-box! the-box (hashmap-set! old + identifier + (nesting-level))))) + +(define (compile-pattern ellipsis literals stx) + ;; Compile `stx` into a pattern matcher with `ellipsis` as the ellipsis + ;; identifier and the set of `literals`. + ;; + ;; Returns two values, the binding map and the matcher, which is a + ;; procedure `binding-map * syntax -> (or binding-map #f)`. + (parameterize ((nesting-level 0) + (actual-ellipsis-container + (if (set-contains? literals ellipsis) + #f + (lambda (stx) + (and (identifier? stx) + (bound-identifier=? stx ellipsis))))) + (literals-parameter literals) + (bindings (box (empty-map)))) + (let ((match (compile stx))) + (values match (unbox (bindings)))))) + +;;; ;;;;;;;;;;;;;;;;;;;; +;;; Helper functions +;;; ;;;;;;;;;;;;;;;;;;;; + +(define (empty-map) + (hashmap bound-identifier-comparator)) + +(define (literal? identifier) + (set-contains? (literals-parameter) identifier)) (define (merge-names oldnames newnames) ;; newnames is the patterns matched in an ellipsis expression. Append - ;; each to the lists in `oldnames`, and return that map. + ;; each to the lists in `oldnames` associated with each name and return + ;; the merged list. (define (proc key val names) - (hashmap-update/default names - key - (cut push-to-matched-ellipsis val <>) - (make-matched-ellipsis '()))) + (hashmap-update names key (cut cons val <>))) (hashmap-fold proc oldnames newnames)) -(define (compile-ellipsis match-patcar match-patcddr) - (define (match* names stx) - (let ((stx (unwrap-syntax stx))) +;;; ;;;;;;;;;;;;;; +;;; Compile functions +;;; ;;;;;;;;;;;;;; + +(define (compile pattern) + (let ((pattern (unwrap-syntax pattern))) + (cond + ((self-syntax? pattern) + (lambda (names stx) + (let ((stx (unwrap-syntax stx))) + (and (self-syntax? stx) + (equal? stx pattern) + names)))) + ((pair? pattern) + (compile-pair (unwrap-syntax (car pattern)) + (unwrap-syntax (cdr pattern)))) + ((vector? pattern) + (compile-vector pattern)) + ((not (identifier? pattern)) + (error "not syntax" pattern)) + ((literal? pattern) + (lambda (names stx) + (and (bound-identifier=? stx pattern) + names))) + ((actual-ellipsis? pattern) + (error "invalid ellipsis location" pattern)) + ((bound-identifier=? pattern (empty-wrap '_)) + (lambda (names stx) names)) + (else + (add-name! pattern) + (lambda (names stx) + (hashmap-set names pattern stx)))))) + +;;; ;;;;;;;;;;;;;;; +;;; Compile pairs +;;; ;;;;;;;;;;;;;;; + +(define (compile-pair patcar patcdr) + ;; Compile a general pair. A pair pattern can either be + ;; + ;; (x <ellipsis> . y) + ;; or + ;; (x) + ;; or + ;; (x <not-an-ellipsis> . y) + (if (null? patcdr) + (compile-actual-pair patcar '()) + (let-values (((has-ellipsis? pat-next) + (is-ellipsis-list patcdr))) + (if has-ellipsis? + (compile-ellipsis patcar pat-next) + (compile-actual-pair patcar pat-next))))) + +(define (is-ellipsis-list patcdr) + ;; Returns (values has-ellipsis? next). `has-ellipsis?` is true if the + ;; pair is an ellipsis pattern, and false otherwise. `next` is the next + ;; pattern that will be matched. + (if (null? patcdr) + (values #f patcdr) + (let ((patcadr (unwrap-syntax (car patcdr)))) + (if (actual-ellipsis? patcadr) + (values #t (cdr patcdr)) + (values #f patcdr))))) + +(define (compile-ellipsis patcar patcddr) + ;; Compile an ellipsis pattern that matches `patcar` zero or more times + ;; and then must match `patcddr`. + (define-values (match-patcar default-names) + (call/nesting-level compile patcar)) + (define match-patcddr (compile patcddr)) + (lambda (names stx) + (let match* ((names (hashmap-union names default-names)) + (stx (unwrap-syntax stx))) (cond ((null? stx) names) ((not (pair? stx)) #f) - ((match-patcar empty-map (car stx)) + ((match-patcar (empty-map) (car stx)) => (lambda (newnames) (cond - ((match* (merge-names names newnames) (cdr stx)) + ((match* (merge-names names newnames) + (cdr stx)) => values) (else (match-patcddr names stx))))) - (else (match-patcddr names stx))))) - match*) + (else (match-patcddr names stx)))))) -(define (compile-actual-pair match-patcar match-patcdr) +(define (compile-actual-pair patcar patcdr) + ;; Compile a pair that is not an ellipsis pattern. I.e. match `patcar` + ;; then match `patcdr. + (define match-patcar (compile patcar)) + (define match-patcdr (compile patcdr)) (lambda (names stx) (let ((stx (unwrap-syntax stx))) (cond @@ -77,116 +244,68 @@ => (cute match-patcdr <> (cdr stx))) (else #f))))) +;;; ;;;;;;;;;;;;;;;;;;; +;;; Compile vectors +;;; ;;;;;;;;;;;;;;;;;;; -(define (compile-pattern-in-vector match k) - (lambda (names vec i) - (let ((stx (vector-ref vec i))) - (cond - ((match names stx) => (cute k <> vec (+ i 1))) - (else #f))))) - -(define (compile-ellipsis-in-vector match k) - (define (match* names vec i) - (cond - ((= i (vector-length vec)) (k names vec i)) - ((match empty-map (vector-ref vec i)) - => (lambda (new-names) - (cond - ((match* (merge-names names - new-names) - (+ i 1)) - => values) - (else (k names vec i))))) - (else (k names vec i)))) - match*) - -(define (compile-pattern ellipsis literals pattern) - (define names (set bound-identifier-comparator)) - ;; - ;; - (define actual-ellipsis? - (if (set-contains? literals ellipsis) - (lambda (x) #f) - (lambda (stx) - (and (identifier? stx) - (bound-identifier=? stx ellipsis))))) - ;; - ;; - (define (is-ellipsis-list patcdr) - (if (null? patcdr) - (values #f (compile patcdr)) - (let ((patcadr (unwrap-syntax (car patcdr)))) - (if (actual-ellipsis? patcadr) - (values #t (compile (cdr patcdr))) - (values #f (compile patcdr)))))) - ;; - ;; - (define (compile-pair patcar patcdr) - (let ((match-patcar (compile patcar))) - (if (null? patcdr) - (compile-single-list match-patcar) - (let-values (((has-ellipsis? match-next) - (is-ellipsis-list patcdr))) - (if has-ellipsis? - (compile-ellipsis match-patcar match-next) - (compile-actual-pair match-patcar match-next)))))) +(define (compile-vector vec) + ;; Vector patterns are traversed in reverse order, which means that ;; + ;; 1. The compiler is tail-recursive (not that it will matter much). + ;; 2. Ellipses detection requires no lookahead. ;; - (define (compile-vector vec) - (define final-continuation - (lambda (names vec i) - (if (= i (vector-length vec)) - names - #f))) - (define (on-ellipsis i k) - (let ((cur (vector-ref vec (- i 1)))) - (when (actual-ellipsis? cur) - (error "... ... is not allowed" cur)) - (compile-index (- i 2) - (compile-ellipsis-in-vector - (compile (vector-ref vec (- i 1))) - k)))) - (define (compile-index i k) + ;; The internal vector procedures take an extra argument, `i`, which + ;; is the current index into the matched vector. + (define entry + (let compile-index ((i 0) + (k match-end-of-vector)) (cond ((zero? i) k) (else (let ((cur (vector-ref vec i))) (if (actual-ellipsis? cur) - (on-ellipsis i k) + (compile-index (- i 2) + (compile-vector-ellipsis vec (- i 1) k)) (compile-index (- i 1) - (compile-pattern-in-vector (compile cur) - k))))))) - (compile-index 0 final-continuation)) - ;; - ;; - (define (compile pattern) - (let ((pattern (unwrap-syntax pattern))) + (compile-pattern-in-vector cur k)))))))) + (lambda (names stx) + (entry names stx 0))) + +(define (match-end-of-vector names vec i) + ;; Compiled procedure to match the end of a vector. This is constant for + ;; any vector. + (if (= i (vector-length vec)) + names + #f)) + +(define (compile-vector-ellipsis vec i match-rest) + ;; Compile the pattern in `vec` at `i` as an ellipsis pattern. + (when (= i 0) + (error "... is not allowed at the start of a vector" vec)) + (let ((cur (vector-ref vec i))) + (when (actual-ellipsis? cur) + (error "... ... is not allowed" cur)) + (let-values (((match default-names) (call/nesting-level compile cur))) + (lambda (names vec i) + (let match* ((names (hashmap-union names default-names)) + (i i)) + (cond + ((= i (vector-length vec)) (match-rest names vec i)) + ((match (empty-map) (vector-ref vec i)) + => (lambda (new-names) + (cond + ((match* (merge-names names new-names) + (+ i 1)) + => values) + (else (match-rest names vec i))))) + (else (match-rest names vec i)))))))) + +(define (compile-pattern-in-vector pattern match-rest) + ;; Compile `pattern` to be matched in a vector. + (define match (compile pattern)) + (lambda (names vec i) + (let ((stx (vector-ref vec i))) (cond - ((self-syntax? pattern) - (lambda (names stx) - (let ((stx (unwrap-syntax stx))) - (and (self-syntax? stx) - (equal? stx pattern) - names)))) - ((pair? pattern) - (compile-pair (unwrap-syntax (car pattern)) - (unwrap-syntax (cdr pattern)))) - ((vector? pattern) - (compile-vector pattern)) - ((not (identifier? pattern)) - (error "not syntax" pattern)) - ((set-contains? literals pattern) - (lambda (names stx) - (and (bound-identifier=? stx pattern) - names))) - ((actual-ellipsis? pattern) - (error "invalid ellipsis location" pattern)) - ((bound-identifier=? pattern (empty-wrap '_)) - (lambda (names stx) names)) - (else - (when (set-contains? names pattern) - (error "duplicated name" pattern)) - (set! names (set-adjoin! names pattern)) - (lambda (names stx) - (hashmap-set names pattern stx)))))) - (compile pattern)) + ((match names stx) => (cute match-rest <> vec (+ i 1))) + (else #f))))) + diff --git a/multisyntax/patterns.sld b/multisyntax/patterns.sld index 889d45f..2f3450f 100644 --- a/multisyntax/patterns.sld +++ b/multisyntax/patterns.sld @@ -15,8 +15,7 @@ (define-library (multisyntax patterns) (import (scheme base) (scheme write) - (srfi 26) (srfi 113) (srfi 146 hash) + (srfi 26) (srfi 111) (srfi 113) (srfi 146 hash) (multisyntax syntax-object)) - (export compile-pattern matched-ellipsis? - matched-ellipsis-reversed-list) + (export compile-pattern) (include "patterns.scm"))
\ No newline at end of file diff --git a/test/patterns.scm b/test/patterns.scm index 0e9df36..754b818 100644 --- a/test/patterns.scm +++ b/test/patterns.scm @@ -18,10 +18,13 @@ (define empty-set (set bound-identifier-comparator)) (define (test-single-match) - (define matcher + (define-values (matcher names) (compile-pattern ellipsis empty-set (empty-wrap 'x))) + (test-equal "nesting level of identifier" + 0 + (hashmap-ref names (empty-wrap 'x))) (let ((returned (matcher empty-map (empty-wrap 'y)))) (test-assert "identifier" (bound-identifier=? (hashmap-ref returned @@ -36,10 +39,13 @@ (empty-wrap 'y))))) (define (test-match-in-list) - (define matcher + (define-values (matcher names) (compile-pattern ellipsis empty-set (list (empty-wrap 'x)))) + (test-equal "nesting level of identifier" + 0 + (hashmap-ref names (empty-wrap 'x))) (let ((returned (matcher empty-map (empty-wrap 'y)))) (test-assert "does not match identifier" (not returned))) @@ -49,28 +55,35 @@ (empty-wrap 'y))))) (define (test-multiple-matches-in-list) - (define matcher + (define-values (matcher names) (compile-pattern ellipsis empty-set (list (empty-wrap 'x) (empty-wrap 'y)))) + (test-equal "nesting level of x" + 0 + (hashmap-ref names (empty-wrap 'x))) + (test-equal "nesting level of y" + 0 + (hashmap-ref names (empty-wrap 'y))) (let ((returned (matcher empty-map (list 1 2)))) (test-equal "first" 1 (hashmap-ref returned (empty-wrap 'x))) (test-equal "second" 2 (hashmap-ref returned (empty-wrap 'y))))) (define (test-simple-ellipsis) - (define matcher + (define-values (matcher names) (compile-pattern ellipsis empty-set (list (empty-wrap 'x) ellipsis))) + (test-equal "nesting level of x" + 1 + (hashmap-ref names (empty-wrap 'x))) (let* ((list '(1 2 3 4 5 6 7 8)) (returned (matcher empty-map list)) (x-value (hashmap-ref returned (empty-wrap 'x)))) - (test-assert "returned is matched-ellipsis" - (matched-ellipsis? x-value)) (test-equal "(x ...)" (reverse list) - (matched-ellipsis-reversed-list x-value))) + x-value)) #;(let* ((returned (matcher empty-map '())) (x-value (hashmap-ref returned (empty-wrap 'x)))))) |
