diff options
| author | 2025-04-26 14:19:59 -0400 | |
|---|---|---|
| committer | 2025-04-26 14:19:59 -0400 | |
| commit | 9ae19d70f5cdce1a465b81ec822c82dce66e0b3c (patch) | |
| tree | 32ef8c92e7ea782ca2cb9285d081d3983830928a | |
| parent | reorganize pattern module (diff) | |
document pattern matcher, add ellipsis groups
Add precise definitions, with examples, for concepts like the ellipsis
nesting level. This should clarify what the matcher is doing. They
should apply to any implementation of the Macrological Fascicle's
description of patterns.
This also adds ellipsis grouping. This is used to determine which
identifiers are allowed to be repeated with each other in output.
TODO: The concept of group needs to encompass nested identifiers.
For instance
(let-values (((name ...) value) ...) body ...)
does not allow
((name ...) body ...)
but the current system does not handle this.
| -rw-r--r-- | multisyntax/pattern/matcher.scm | 208 | ||||
| -rw-r--r-- | multisyntax/pattern/matcher.sld | 9 | ||||
| -rw-r--r-- | test/pattern/matcher.scm | 85 |
3 files changed, 191 insertions, 111 deletions
diff --git a/multisyntax/pattern/matcher.scm b/multisyntax/pattern/matcher.scm index 451ae78..8870b54 100644 --- a/multisyntax/pattern/matcher.scm +++ b/multisyntax/pattern/matcher.scm @@ -12,39 +12,79 @@ | See the License for the specific language governing permissions and | limitations under the License. |------------------------------------------------------------------------ - | Syntax pattern matcher for syntax-rules and syntax-case. - | The matcher is a backtracking recursive tracker. + | Backtracking recursive syntax pattern matcher for syntax-rules and + | syntax-case. | - | The "compiler" turns the pattern into lambdas. + | The "compiler" returns: | - | The map will return either `#f` (failure) or a map from identifiers to - | either + | 1. a procedure (the matcher) that will ake an input pattern and return + | either `#f` or a hashmap from identifiers to "matched values". + | 2. a hashmap from identifiers to `(cons x y)`, where + | `x` is the "ellipsis nesting level" and `y` is the "ellipsis group". + | The hashmap checks identifier according to `bound-identifier=?`. + | + | The matcher is implemented as a recursive procedure that accumulates + | the result map as it continues. When it encounters a template of the + | form `template ...` in a vector or a list, `template` is matched with + | an empty identifier hashmap. map. If `template` successfully matches, + | the returned identifiers inside of that pattern are inserted into the + | result map as elements of a list and then P is called again. This way, + | multiple values from an ellipsis can be collected. + | + |------------------------------------------------------------------------ + | A "matched value" is defined as: | - | 1. Syntax objects, or - | 2. Reversed lists containined repeated matched values (for ellipsis - | patterns). + | 1. A syntax object, or + | 2. Reversed lists containing matched values from a repeated match. + | This occurs when an identifier has a non-zero ellipsis nesting level. | - | The pattern matcher compiler outputs the nesting level of each - | identifier, which allows a user to differentiate between lists and - | ellipsis pattern values. + | A syntax object can also match a list. To disambiguate this, refer to + | the ellipsis nesting level of an identifier. | - | The matcher is implemented as a recursive procedure that accumulates - | the result map as it continues. When it encounters an ellipsis pattern, - | the pattern P that the ellipsis pattern repeats is called with an empty - | map. If the pattern P returns, the returned identifiers inside of that - | pattern are inserted into the result map as elements of a listm and then - | P is called again. This way, multiple values from an ellipsis can be - | collected. + |------------------------------------------------------------------------ + | The "ellipsis nesting level" of a syntax object is defined as: + | + | * The ellipsis nesting level of a syntax form passed to `compile-pattern` + | is 0. + | * Whenever `template ...` is found in a list or a vector with + | ellipsis nesting level N, then `template` has nesting level `N + 1`. + | + | For example, + | + | 1. The ellipsis nesting level of `(x y z)` is 0. + | 2. The ellipsis nesting level of `(x ...)` is 0. Inside the form, the + | nesting level of `x` is 1. + | 3. In `(let-values (((names ...) value) ...) body ...)`, the nesting + | level of `names` is 2, of `value` 1, and of `body` 1. + | + |------------------------------------------------------------------------ + | The "ellipsis group" is an integer with the property that whenever + | `template ...` is found in a pattern, all identifiers in `template` at + | the same nesting level have the same ellipsis group, and no other + | identifiers have the same ellipsis group. + | + |------------------------------------------------------------------------ + | For an identifier with nesting level N, the result map at the end of + | matching will have N levels of lists. For example: + | + | (let-values (((name1 name2) value1) ((name3 name4) value2)) body) + | + | Will map + | + | names -> ((name4 name3) (name2 name1)) + | value -> (value2 value1) + | body -> (body) |# -#;(define (display-hashmap hashmap) - (display - (list - "hashmap:" - (map (lambda (pair) - (cons (syntax->datum (car pair)) - (cdr pair))) - (hashmap->alist hashmap))))) +;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Auxillary parameters +;;; +;;; These use the unportable `empty-wrap` procedure to create auxillary +;;; syntax keywords (identifiers without timestamps and without an +;;; environment). + +(define ... (empty-wrap '...)) +(define _ (empty-wrap '_)) ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Parameter objects for the parser @@ -58,8 +98,9 @@ ;; Current ellipsis nesting level. (make-parameter 0)) -(define actual-ellipsis-container - ;; Parameter object that contains the ellipsis identifier. +(define actual-ellipsis-procedure + ;; Parameter object that contains a procedure to determine if an + ;; identifier acts as the ellipsis. (make-parameter #f)) (define literals-parameter @@ -81,9 +122,13 @@ ;; matches). (make-parameter #f)) +(define ellipsis-group + ;; Current ellipsis group. + (make-parameter #f)) + (define (call/nesting-level procedure . args) ;; Invoke (procedure args ...) with a higher nesting level and an empty - ;; `bound-here` set. + ;; `bound-here` map. Also creates a new ellipsis group. ;; ;; Returns (values return map): ;; @@ -92,15 +137,14 @@ ;; default values (the empty list). (let ((old-bound-here-box (bound-here))) (parameterize ((nesting-level (+ (nesting-level) 1)) - (bound-here (box (empty-map)))) + (bound-here (box (empty-map))) + (ellipsis-group (generate-unique-integer))) (let ((returned (apply procedure args))) (values returned (unbox (bound-here))))))) (define (actual-ellipsis? identifier) ;; Returns `#t` if `id` is an ellipsis, and `#f` otherwise. - (cond - ((actual-ellipsis-container) => (cut <> identifier)) - (else #f))) + ((actual-ellipsis-procedure) identifier)) (define (add-name! identifier) ;; Add `identifier` to the name map with the current ellipsis nesting @@ -111,43 +155,87 @@ (when (hashmap-contains? old identifier) (error "identifier bound twice" identifier)) (when (bound-here) - (let ((the-set (unbox (bound-here)))) - (set-box! (bound-here) (hashmap-set! the-set - identifier - '())))) + (set-box! (bound-here) (hashmap-set! (unbox (bound-here)) + identifier + '()))) (set-box! the-box (hashmap-set! old identifier - (nesting-level))))) + (cons (nesting-level) + (ellipsis-group)))))) -(define (compile-pattern ellipsis literals stx) +(define (contains-as-free-identifier set key) + ;; Returns an identifier if `key` is `free-identifier=?` to any + ;; identifier in `set`. Otherwise return `#f`. + (cond + ((member key (set->list set) free-identifier=?) => car) + (else #f))) + +(define (generate-ellipsis-procedure literals ellipsis) + ;; Generates a procedure of one argument that returns `#t` if the + ;; argument is an ellipsis for the purposes of the current matcher. + ;; + ;; If `ellipsis` is `#f`, then the `ellipsis` is the auxillary global + ;; `...`, and matching is done with `free-identifier=?`. + ;; + ;; If `ellipsis` is some identifier, then matching is done against it + ;; with `bound-identifier=?`. + ;; + ;; If either ellipsis is in the literals, then there is no repeating + ;; patterns and the returned procedure returns `#f`. + (define always-false-case + (lambda (identifier) #f)) + (define matches-free-identifier + (lambda (identifier) + (and (identifier? identifier) + (free-identifier=? identifier ...)))) + (define matches-passed-ellipsis + (lambda (identifier) + (and (identifier? identifier) + (bound-identifier=? identifier ...)))) + (cond + ((and (not ellipsis) + (contains-as-free-identifier literals ...)) + always-false-case) + ((not ellipsis) matches-free-identifier) + ((set-contains? literals ellipsis) always-false-case) + (else matches-passed-ellipsis))) + +(define compile-pattern ;; Compile `stx` into a pattern matcher with `ellipsis` as the ellipsis ;; identifier and the set of `literals`. ;; ;; Returns two values, the binding map and the matcher, which is a ;; procedure `binding-map * syntax -> (or binding-map #f)`. - (set! literals - (cond - ((set? literals) literals) - ((null? literals) (set bound-identifier-comparator)) - ((pair? literals) - (list->set bound-identifier-comparator literals)) - (else (error "invalid literals" literals)))) - (parameterize ((nesting-level 0) - (actual-ellipsis-container - (if (set-contains? literals ellipsis) - #f - (lambda (stx) - (and (identifier? stx) - (bound-identifier=? stx ellipsis))))) - (literals-parameter literals) - (bindings (box (empty-map)))) - (let ((match (compile stx))) - (values match (unbox (bindings)))))) + (case-lambda + ((literals stx) (compile-pattern literals stx #f)) + ((literals stx ellipsis) + (let ((literals + (cond + ((set? literals) literals) + ((null? literals) (set bound-identifier-comparator)) + ((pair? literals) + (list->set bound-identifier-comparator literals)) + (else (error "invalid literals" literals))))) + (parameterize ((nesting-level 0) + (actual-ellipsis-procedure + (generate-ellipsis-procedure literals ellipsis)) + (literals-parameter literals) + (bindings (box (empty-map)))) + (let ((match (compile stx))) + (values (lambda (stx) + (match (empty-map) stx)) + (unbox (bindings))))))))) ;;; ;;;;;;;;;;;;;;;;;;;; ;;; Helper functions ;;; ;;;;;;;;;;;;;;;;;;;; +(define generate-unique-integer + (let ((i 0)) + (lambda () + (set! i (+ i 1)) + i))) + (define (empty-map) (hashmap bound-identifier-comparator)) @@ -185,11 +273,11 @@ ((literal? pattern) (lambda (names stx) (and (identifier? stx) - (bound-identifier=? stx pattern) + (free-identifier=? stx pattern) names))) ((actual-ellipsis? pattern) (error "invalid ellipsis location" pattern)) - ((bound-identifier=? pattern (empty-wrap '_)) + ((free-identifier=? pattern _) (lambda (names stx) names)) (else (add-name! pattern) @@ -300,7 +388,6 @@ #f)) (define (compile-vector-ellipsis vec i match-rest) - ;; Compile the pattern in `vec` at `i` as an ellipsis pattern. (when (< i 0) (error "... is not allowed at the start of a vector" vec)) (let ((cur (vector-ref vec i))) @@ -322,7 +409,6 @@ (else (match-rest names vec i)))))))) (define (compile-pattern-in-vector pattern match-rest) - ;; Compile `pattern` to be matched in a vector. (define match (compile pattern)) (lambda (names vec i) (cond diff --git a/multisyntax/pattern/matcher.sld b/multisyntax/pattern/matcher.sld index f62e78a..c842577 100644 --- a/multisyntax/pattern/matcher.sld +++ b/multisyntax/pattern/matcher.sld @@ -14,8 +14,13 @@ |# (define-library (multisyntax pattern matcher) - (import (scheme base) (scheme write) + (import (scheme base) (scheme write) (scheme case-lambda) (srfi 26) (srfi 111) (srfi 113) (srfi 146 hash) - (multisyntax syntax-object)) + (only (multisyntax syntax-object) + identifier? bound-identifier=? free-identifier=? + unwrap-syntax + ;; unportable extensions + self-syntax? + empty-wrap bound-identifier-comparator)) (export compile-pattern) (include "matcher.scm"))
\ No newline at end of file diff --git a/test/pattern/matcher.scm b/test/pattern/matcher.scm index 317454c..acdea63 100644 --- a/test/pattern/matcher.scm +++ b/test/pattern/matcher.scm @@ -22,17 +22,17 @@ (define (test-single-match) (define-values (matcher names) - (compile-pattern ellipsis '() (empty-wrap 'x))) - (test-equal "nesting level of identifier" - 0 + (compile-pattern '() (empty-wrap 'x))) + (test-equal "nesting info of identifier" + (cons 0 #f) (hashmap-ref names (empty-wrap 'x))) - (let ((returned (matcher empty-map (empty-wrap 'y)))) + (let ((returned (matcher (empty-wrap 'y)))) (test-assert "identifier" (bound-identifier=? (hashmap-ref returned (empty-wrap 'x)) (empty-wrap 'y)))) - (let* ((returned (matcher empty-map (list - (empty-wrap 'y)))) + (let* ((returned (matcher (list + (empty-wrap 'y)))) (res (hashmap-ref returned (empty-wrap 'x)))) (test-assert "match on list returns list" (list? res)) (test-assert "is the same list" @@ -41,54 +41,52 @@ (define (test-match-in-list) (define-values (matcher names) - (compile-pattern ellipsis '() (list (empty-wrap 'x)))) - (test-equal "nesting level of identifier" - 0 + (compile-pattern '() (list (empty-wrap 'x)))) + (test-equal "nesting info of identifier" + (cons 0 #f) (hashmap-ref names (empty-wrap 'x))) - (let ((returned (matcher empty-map (empty-wrap 'y)))) + (let ((returned (matcher (empty-wrap 'y)))) (test-assert "does not match identifier" (not returned))) - (let ((returned (matcher empty-map (list (empty-wrap 'y))))) + (let ((returned (matcher (list (empty-wrap 'y))))) (test-assert "matches inside of list" (bound-identifier=? (hashmap-ref returned (empty-wrap 'x)) (empty-wrap 'y))))) (define (test-multiple-matches-in-list) (define-values (matcher names) - (compile-pattern ellipsis '() (list (empty-wrap 'x) - (empty-wrap 'y)))) - (test-equal "nesting level of x" - 0 + (compile-pattern '() (list (empty-wrap 'x) + (empty-wrap 'y)))) + (test-equal "nesting info of x" + (cons 0 #f) (hashmap-ref names (empty-wrap 'x))) - (test-equal "nesting level of y" - 0 + (test-equal "nesting info of y" + (cons 0 #f) (hashmap-ref names (empty-wrap 'y))) - (let ((returned (matcher empty-map (list 1 2)))) + (let ((returned (matcher (list 1 2)))) (test-equal "first" 1 (hashmap-ref returned (empty-wrap 'x))) (test-equal "second" 2 (hashmap-ref returned (empty-wrap 'y))))) (define (test-simple-ellipsis) (define-values (matcher names) - (compile-pattern ellipsis - '() - (list (empty-wrap 'x) ellipsis))) + (compile-pattern '() (list (empty-wrap 'x) ellipsis))) (test-equal "nesting level of x" 1 - (hashmap-ref names (empty-wrap 'x))) + (car (hashmap-ref names (empty-wrap 'x)))) (let* ((list '(1 2 3 4 5 6 7 8)) - (returned (matcher empty-map list)) + (returned (matcher list)) (x-value (hashmap-ref returned (empty-wrap 'x)))) (test-equal "(1 2 3 ...)" (reverse list) x-value)) - (let ((returned (matcher empty-map '()))) + (let ((returned (matcher '()))) (test-equal "()" '() (hashmap-ref returned (empty-wrap 'x)))) (let* ((list (list (empty-wrap 'x) 1 (empty-wrap 'y))) - (returned (matcher empty-map list)) + (returned (matcher list)) (values (hashmap-ref returned (empty-wrap 'x)))) (test-group "(x 1 y)" (test-assert "y" @@ -103,12 +101,11 @@ (define (test-multiple-ellipsis) (define-values (matcher names) - (compile-pattern ellipsis - '() + (compile-pattern '() (list (list (empty-wrap 'x) ellipsis) (list (empty-wrap 'y) ellipsis)))) (define (test-for list x y) - (let ((returned (matcher empty-map list))) + (let ((returned (matcher list))) (test-equal "x" x (hashmap-ref returned (empty-wrap 'x))) @@ -126,12 +123,11 @@ (define (test-compound-ellipsis) (define-values (matcher names) - (compile-pattern ellipsis - '() + (compile-pattern '() (list (list (empty-wrap 'name) (empty-wrap 'value)) ellipsis))) (define (test-for list x y) - (let ((returned (matcher empty-map list))) + (let ((returned (matcher list))) (test-equal "x" x (hashmap-ref returned (empty-wrap 'x))) @@ -147,13 +143,12 @@ (define (test-nested-ellipsis) (define-values (matcher names) - (compile-pattern ellipsis - '() + (compile-pattern '() (list (list (list (empty-wrap 'name) ellipsis) (empty-wrap 'value)) ellipsis))) (define (test-of form names values) - (let ((returned (matcher empty-map form))) + (let ((returned (matcher form))) (test-equal "names" names (hashmap-ref returned (empty-wrap 'name))) @@ -167,55 +162,49 @@ ("name3" "name2" "name1")) '("value2" "value1"))) (test-assert "non list fails" - (not (matcher empty-map - '(("name1 value1") ("name2" "value2"))))) + (not (matcher '(("name1 value1") ("name2" "value2"))))) (test-assert "partial non list fails" (not (matcher - empty-map '((("name1" "name2") "value1") ("name3" "value3")))))) (define (test-single-literal) (define literal-list (list (empty-wrap 'literal))) (define-values (matcher names) - (compile-pattern ellipsis - literal-list + (compile-pattern literal-list (list (empty-wrap 'literal) (empty-wrap 'x)))) (test-assert "without literal fails" (not - (matcher empty-map '("literal" "value")))) + (matcher '("literal" "value")))) (test-group "with literal succeeds" - (let ((returned (matcher empty-map `(,(empty-wrap 'literal) "value")))) + (let ((returned (matcher `(,(empty-wrap 'literal) "value")))) (test-equal "x" "value" (hashmap-ref returned (empty-wrap 'x)))))) (define (test-ignored-pattern) (define-values (matcher names) - (compile-pattern ellipsis - '() + (compile-pattern '() (list (empty-wrap '_) (empty-wrap 'x)))) (test-equal "names is length 1" 1 (hashmap-size names)) (test-assert "names contains x" (hashmap-contains? names (empty-wrap 'x))) - (let ((returned (matcher empty-map '(1 2)))) + (let ((returned (matcher '(1 2)))) (test-equal "x" 2 (hashmap-ref returned (empty-wrap 'x))))) (define (test-matching-a-vector) (define-values (matcher names) - (compile-pattern ellipsis - (list (empty-wrap 'then)) + (compile-pattern (list (empty-wrap 'then)) (vector (empty-wrap 'x) ellipsis (empty-wrap 'then) (empty-wrap 'y)))) - (let ((returned (matcher empty-map - (vector 1 2 3 4 5 + (let ((returned (matcher (vector 1 2 3 4 5 (empty-wrap 'then) 6)))) (test-assert "matched" returned) |
