diff options
| author | 2025-06-16 16:40:40 -0400 | |
|---|---|---|
| committer | 2025-06-16 16:40:40 -0400 | |
| commit | 46fba398781837ca638e2e40195a5f1600a934e6 (patch) | |
| tree | f89f4ea2c9d7872ee3b7e72e2fb51c811983ff82 | |
| parent | move common procedures for matcher and producer to internal (diff) | |
producer
| -rw-r--r-- | multisyntax/pattern/internal.scm | 33 | ||||
| -rw-r--r-- | multisyntax/pattern/internal.sld | 4 | ||||
| -rw-r--r-- | multisyntax/pattern/matcher.scm | 9 | ||||
| -rw-r--r-- | multisyntax/pattern/producer.scm | 308 | ||||
| -rw-r--r-- | multisyntax/syntax-object.scm | 7 |
5 files changed, 341 insertions, 20 deletions
diff --git a/multisyntax/pattern/internal.scm b/multisyntax/pattern/internal.scm index 2753459..dd6b5b4 100644 --- a/multisyntax/pattern/internal.scm +++ b/multisyntax/pattern/internal.scm @@ -81,20 +81,21 @@ ;; The values of the parameter is a vector #(ellipsis-procedure literals), ;; where `ellipsis-procedure` is a procedure of one argument that returns ;; true if the passed argument is a real ellipsis and 0 otherwise. The - ;; value `literals` is a set of literals, disambiguated by `bound-identifier=?`. - (make-parameter #f - (lambda (inputs) - (if inputs - (let ((literals-set - (convert-to-literals-set - (vector-ref inputs 1)))) - (vector (generate-ellipsis-procedure - (cond - ((vector-ref inputs 0) => values) - (else ...)) - literals-set) - literals-set)) - #f)))) + ;; value `literals` is a set of literals, disambiguated by + ;; `bound-identifier=?`. + (let () + (define (transformer inputs) + (and inputs + (let ((literals-set + (convert-to-literals-set + (vector-ref inputs 1)))) + (vector (generate-ellipsis-procedure + (cond + ((vector-ref inputs 0) => values) + (else ...)) + literals-set) + literals-set)))) + (make-parameter #f transformer))) (define (ellipsis-procedure) (vector-ref (matcher-input) 0)) @@ -118,3 +119,7 @@ (define (literal? identifier) (set-contains? (literals) identifier)) + +(define (empty-map) + (hashmap bound-identifier-comparator)) + diff --git a/multisyntax/pattern/internal.sld b/multisyntax/pattern/internal.sld index 0199ced..0345f01 100644 --- a/multisyntax/pattern/internal.sld +++ b/multisyntax/pattern/internal.sld @@ -17,6 +17,6 @@ (define-library (multisyntax pattern internal) (import (scheme base) (srfi 113) (multisyntax syntax-object)) - (export matcher-input - is-ellipsis-list actual-ellipsis? literal?) + (export matcher-input is-ellipsis-list actual-ellipsis? literal? + empty-map) (include "internal.scm")) diff --git a/multisyntax/pattern/matcher.scm b/multisyntax/pattern/matcher.scm index 6c8d92c..52e58b8 100644 --- a/multisyntax/pattern/matcher.scm +++ b/multisyntax/pattern/matcher.scm @@ -67,7 +67,11 @@ | |------------------------------------------------------------------------ | For an identifier with nesting level N, the result map at the end of - | matching will have N levels of lists. For example: + | matching will have N levels of lists. For example, the pattern + | + | (let-values (((name ...) value) ...) body ...) + | + | with input | | (let-values (((name1 name2) value1) ((name3 name4) value2)) body) | @@ -197,9 +201,6 @@ ;;; Helper functions ;;; ;;;;;;;;;;;;;;;;;;;; -(define (empty-map) - (hashmap bound-identifier-comparator)) - (define (merge-names oldnames newnames) ;; newnames is the patterns matched in an ellipsis expression. Append ;; each to the lists in `oldnames` associated with each name and return diff --git a/multisyntax/pattern/producer.scm b/multisyntax/pattern/producer.scm new file mode 100644 index 0000000..4889c7f --- /dev/null +++ b/multisyntax/pattern/producer.scm @@ -0,0 +1,308 @@ +#| Copyright (c) Peter McGoron 2025 + | + | Licensed under the Apache License, Version 2.0 (the "License"); + | you may not use this file except in compliance with the License. + | You may obtain a copy of the License at + | + | http://www.apache.org/licenses/LICENSE-2.0 + | + | Unless required by applicable law or agreed to in writing, software + | distributed under the License is distributed on an "AS IS" BASIS, + | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + | See the License for the specific language governing permissions and + | limitations under the License. + |------------------------------------------------------------------------ + | Pattern "producer" for syntax-rules and syntax-case. + | Takes the output of a pattern match and the ellipses nesting data for + | that pattern match and return a syntax object with the output of the + | pattern match. + | + | This does not timestamp, because that is the expander's job. + |------------------------------------------------------------------------ + | The following uses terms defined in "matcher.scm". + | + | The "real nesting level" of an identifier (`RNL(I)`) is the nesting level + | from the pattern matcher. + | + | The "producer nesting level" of a pattern `P` (`PNL(P)`) relative to an + | input pattern `P'` + | is defined similarly to the ellipsis nesting level, except that it is + | defined on a producer pattern, and each occurence of an identifier may + | have a different producer nesting level. When the input pattern is not + | specified, it is the entire input pattern. + | + | Let `P` be a pattern. Then a subpattern `P'` is closed relative to `P` + | if + | + | * `P'` is an identifier and `PNL(P') >= RNL(P')` (relative to `P`), or + | * `P'` is literal, or + | * `P'` is a list, vector, or ellipsis pattern where all elements of `P'` + | are closed relative to `P'`. + | + | The output length of a subpattern `P'` of a pattern `P` is the length + | of the `PNL(P')` (relative to `P`) nesting level of the matched form. + | + | A pattern producer `P` is well formed if + | + | 1. Each subpattern of `P` is closed relative to `P`, and + | 2. For each ellipsis pattern `A ...` of `P`, + | 1. At least one pattern in `A` is open relative to `A`. + | 2. Each pattern `P'` open relative to `A` with `PNL(P') = 0` relative + | to `A` has the same output length (relative to `A`). + |------------------------------------------------------------------------ + | A stricter pattern producer would only allow open identifiers inside of + | an ellipsis pattern that are a part of the same ellipsis class (i.e. + | they were contained in the same outermost ellipsis in the matcher). + | This is relaxed to allow for `generate-temporaries` forms bound + | with `with-syntax` to be useful. See the example in the R6RS Standard + | Libraries, ยง12.7. + |-------------------------------------------------------------------- + | Here's a smoke test for your syntax-rules pattern matcher: + + (define-syntax test + (syntax-rules () + ((_ (x ...)) + (quote ((x (x ...)) ...))))) + (test (1 2)) + + | This outputs on Chez + + ((1 (1 2)) (2 (1 2))) + + | What does this do? + + (define-syntax test2 + (syntax-rules () + ((_ (x ...) ...) + (quote (((x (x ...)) ...) ...))))) + + (test2 (1 2) (3 4)) + + | On Chez it outputs + + (((1 (1 2)) (2 (3 4))) ((3 (1 2)) (4 (3 4)))) + + | Surprising! To make sense of it, consider the rewrite + + (define-syntax test3 + (lambda (x) + (syntax-case x () + ((_ (x ...) ...) + (with-syntax ((((y ...) ...) #'((x ...) ...))) + #'(quote (((x (y ...)) ...) ...))))))) + + | The outermost `...` is more ellipses than `y` was introduced with, so + | it will be repeated. The `...` after that will select one of the + | nestings of `y`, and the second `...` next to `y` will select one value + | in one nesting of `y`. + | + | The easiest way to make sense of such patterns is to rewrite the + | patterns so that each identifier in the production is used at most once. + | This is done automatically. + |# + +(define (rewrite/temporaries pattern bindings) + ;; Rewrite `pattern` such that all pattern-bound identifiers occur in + ;; `pattern` at most once. + (define appearances (set bound-identifier-comparator)) + (define (add-appearance! id) + (set! appearances (set-adjoin appearances id))) + (define (appears? id) + (set-member? appearances id)) + (define (add-temporary! id) + (let ((new (generate-identifier (syntax->datum id)))) + (set! bindings (mapping-adjoin bindings + new + (mapping-ref bindings id))) + new)) + (define (rewrite pattern) + (let ((pattern (unwrap-syntax pattern))) + (cond + ((pair? pattern) (cons (rewrite (car pattern)) + (rewrite (cdr pattern)))) + ((vector? pattern) (vector-map rewrite pattern)) + ((literal? pattern) pattern) + ((actual-ellipsis? pattern) pattern) + ((and (identifier? pattern) (appears? pattern)) + (add-temporary! pattern)) + ((identifier? pattern) + (add-appearance! pattern) + pattern) + (else pattern)))) + (let ((pattern (rewrite pattern))) + (values pattern bindings))) + +(define PNL + ;; The current producer nesting level relative to the whole producer. + ;; Must be an integer. + (make-parameter #f)) + +(define bindings + ;; Mapping bound identifiers to their real nesting level. + (make-parameter #f)) + +(define all-bindings (make-parameter #f)) + +(define (compile-producer literals pattern %bindings ellipsis) + ;; Enty point into the producer compiler. + ;; + ;; This will rewrite the producer so that each bound identifier is used + ;; at most once. + (parameterize ((matcher-input (vector ellipsis literals))) + (let-values (((pattern %bindings) + (rewrite/temporaries pattern %bindings))) + (parameterize ((PNL 0) + (bindings %bindings)) + (let-values (((producer open-bindings) + (compile pattern))) + (if (not (hashmap-empty? open-bindings)) + (error "pattern not closed" pattern) + (lambda (bindings) + (parameterize ((all-bindings bindings)) + (producer bindings))))))))) + +(define (compile pattern) + ;; Returns a procedure that will produce `pattern` given the bindings. + (let ((pattern (unwrap-syntax pattern))) + (cond + ((self-syntax? pattern) + (values (lambda (bindings) pattern) + (empty-map))) + ((pair? pattern) (compile-pair (unwrap-syntax (car pattern)) + (unwrap-syntax (cdr pattern)))) + ;; TODO: Vectors + #;((vector? pattern) (compile-vector pattern)) + ((actual-ellipsis? pattern) + (error "ellipsis in location where it is not allowed" pattern)) + ((literal? pattern) + (values (lambda (bindings) pattern) + (empty-map))) + ((identifier? pattern) + (values (lambda (bindings) (hashtable-ref bindings pattern)) + (hashmap bound-element-comparator pattern (PNL)))) + (else (error "not syntax" pattern))))) + +;;; ;;;;;;;;; +;;; Lists +;;; ;;;;;;;;; + +(define (list-of-ellipses patcdr) + ;; Returns `(values i pat)`, where `i` is the number of found ellipses + ;; and `pat` is the first non-ellipsis. + (let loop ((i 0) + (patcdr patcdr)) + (cond + ((null? patcdr) (values i patcdr)) + ((not (pair? patcdr)) (values i patcdr)) + (else + (let ((patcar (unwrap-syntax (car patcdr)))) + (if (actual-ellipsis? patcar) + (loop (+ i 1) (unwrap-syntax (cdr patcdr))) + (values i patcdr))))))) + +(define (compile-pair patcar patcdr) + (let*-values (((number-of-ellipses next) (list-of-ellipses patcdr)) + ((produce-next open-identifiers-next) (compile next))) + (if (zero? number-of-ellipses) + (let-values (((produce-car open-identifiers) (compile next))) + (values + (lambda (bindings) + (cons (produce-car bindings) (produce-next bindings))) + (mapping-union open-identifiers open-identifiers-next))) + (let-values (((produce-in-ellipsis open-identifiers-of) + (compile-in-list-ellipsis patcar))) + (if (hashmap-empty? open-identifiers-of) + (values + ;; If the ellipses binding is completely closed, then pass + ;; the pattern producer all the bindings (which is stored as + ;; a parameter object). + (lambda (bindings) + (append (produce-in-ellipsis (all-bindings)) + (produce-next bindings))) + open-identifiers-next) + (values + (lambda (bindings) + (append (produce-in-ellipsis bindings) + (produce-next bindings))) + (mapping-union open-identifiers + open-identifiers-next))))))) + +;;; The following prodcures are related to "iterator" maps, which are the +;;; maps used to output ellipses productions. + +(define (length+reverse list) + (let loop ((i 0) + (list list) + (acc 0)) + (if (null? list) + (values i acc) + (loop (+ i 1) (cdr list) (cons (car list) acc))))) + +(define (open-bindings open-identifiers bindings) + ;; Return a map of open identifiers to a correctly ordered list of their + ;; captured values. Will raise an error if there is their lengths are not + ;; the same. + (define length-of-each #f) + (hashmap-map + (lambda (open-identifier _) + (let*-values (((rev-bound) (mapping-ref bindings open-identifier)) + ((length bound) (length+reverse rev-bound))) + (cond + ((not length-of-each) + (set! length-of-each length) + (values open-identifier bound)) + ((not (= length-of-each length)) + (error "length mismatch for open identifiers" + bindings + open-identifiers + bound)) + (else (values open-identifier bound))))) + bound-identifier-comparator + open-identifiers)) + +(define (next-binding open-bindings) + ;; Returns a map of open identifiers that points to the next bound values + ;; to output. + (hashmap-map (lambda (identifier values) + (values identifier (cdr values))) + bound-identifier-comparator + open-bindings)) + +(define (union/current-bindings bindings open-identifiers-map) + ;; Return a map of bindings, where the open identifiers are assigned the + ;; current bound value in iteration. + (hashmap-union + (hashmap-map (lambda (open-identifier values) + (values open-identifier (car values))) + bound-identifier-comparator + open-identifiers-map) + bindings)) + +(define (bindings-finished? bindings) + ;; Returns true if there are no more bound values to iterate over. + (hashmap-any? (lambda (_ values) (null? values)) bindings)) + +(define (will-be-closed? identifier PNL) + (>= PNL (hashmap-ref (bindings) identifier))) + +(define (produce-ellipsis-list number-of-ellipses patcar) + ;; Prepare a procedure that matches an ellipsis pattern in a list. + (let*-values (((produce-part open-identifiers) + ;; Get all open identifiers under the ellipses. + (parameterize ((PNL (+ (PNL) number-of-ellipses))) + (compile patcar))) + ((open-identifiers-to-return) + ;; Remove identifiers which that will become closed after + ;; exiting the ellipses. + (hashmap-delete will-be-closed? open-identifiers))) + (when (null? all-open-identifiers) + (error "ellipsis production does not have open identifiers" patcar)) + (values (lambda (bindings) + (do ((iterated (open-bindings open-identifiers bindings) + (next-binding iterated)) + (patterns (list-accumulator))) + ((bindings-finished? iterated) (patterns (eof-object))) + (let ((subbindings (union/current-bindings bindings + iterated))) + (patterns (produce-part subbindings))))) + open-identifiers-to-return))) diff --git a/multisyntax/syntax-object.scm b/multisyntax/syntax-object.scm index d3058b5..1324182 100644 --- a/multisyntax/syntax-object.scm +++ b/multisyntax/syntax-object.scm @@ -108,6 +108,13 @@ ;;; to do the actual macro expansion. This still results in linear ;;; time complexity, but with a larger constant. ;;; +;;; IDEA: Currently the entire algorithm is persistent. One could give up +;;; persistence and do eager wrap propagation using mutation. This means that +;;; the input no longer has to be scanned. This would make the additional +;;; growth linear in the size of introduced identifiers that have not been +;;; reached yet. This would help in cases where introduced identifiers are +;;; very far down the tree. +;;; ;;; The direct-implementation has the property that no eager wrap ;;; propagation must be done. However, it requires linear behavior for the ;;; set of marks and substitutions. This would make the expander very slow |
