aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2025-06-16 16:40:40 -0400
committerGravatar Peter McGoron 2025-06-16 16:40:40 -0400
commit46fba398781837ca638e2e40195a5f1600a934e6 (patch)
treef89f4ea2c9d7872ee3b7e72e2fb51c811983ff82
parentmove common procedures for matcher and producer to internal (diff)
producer
-rw-r--r--multisyntax/pattern/internal.scm33
-rw-r--r--multisyntax/pattern/internal.sld4
-rw-r--r--multisyntax/pattern/matcher.scm9
-rw-r--r--multisyntax/pattern/producer.scm308
-rw-r--r--multisyntax/syntax-object.scm7
5 files changed, 341 insertions, 20 deletions
diff --git a/multisyntax/pattern/internal.scm b/multisyntax/pattern/internal.scm
index 2753459..dd6b5b4 100644
--- a/multisyntax/pattern/internal.scm
+++ b/multisyntax/pattern/internal.scm
@@ -81,20 +81,21 @@
;; The values of the parameter is a vector #(ellipsis-procedure literals),
;; where `ellipsis-procedure` is a procedure of one argument that returns
;; true if the passed argument is a real ellipsis and 0 otherwise. The
- ;; value `literals` is a set of literals, disambiguated by `bound-identifier=?`.
- (make-parameter #f
- (lambda (inputs)
- (if inputs
- (let ((literals-set
- (convert-to-literals-set
- (vector-ref inputs 1))))
- (vector (generate-ellipsis-procedure
- (cond
- ((vector-ref inputs 0) => values)
- (else ...))
- literals-set)
- literals-set))
- #f))))
+ ;; value `literals` is a set of literals, disambiguated by
+ ;; `bound-identifier=?`.
+ (let ()
+ (define (transformer inputs)
+ (and inputs
+ (let ((literals-set
+ (convert-to-literals-set
+ (vector-ref inputs 1))))
+ (vector (generate-ellipsis-procedure
+ (cond
+ ((vector-ref inputs 0) => values)
+ (else ...))
+ literals-set)
+ literals-set))))
+ (make-parameter #f transformer)))
(define (ellipsis-procedure)
(vector-ref (matcher-input) 0))
@@ -118,3 +119,7 @@
(define (literal? identifier)
(set-contains? (literals) identifier))
+
+(define (empty-map)
+ (hashmap bound-identifier-comparator))
+
diff --git a/multisyntax/pattern/internal.sld b/multisyntax/pattern/internal.sld
index 0199ced..0345f01 100644
--- a/multisyntax/pattern/internal.sld
+++ b/multisyntax/pattern/internal.sld
@@ -17,6 +17,6 @@
(define-library (multisyntax pattern internal)
(import (scheme base) (srfi 113) (multisyntax syntax-object))
- (export matcher-input
- is-ellipsis-list actual-ellipsis? literal?)
+ (export matcher-input is-ellipsis-list actual-ellipsis? literal?
+ empty-map)
(include "internal.scm"))
diff --git a/multisyntax/pattern/matcher.scm b/multisyntax/pattern/matcher.scm
index 6c8d92c..52e58b8 100644
--- a/multisyntax/pattern/matcher.scm
+++ b/multisyntax/pattern/matcher.scm
@@ -67,7 +67,11 @@
|
|------------------------------------------------------------------------
| For an identifier with nesting level N, the result map at the end of
- | matching will have N levels of lists. For example:
+ | matching will have N levels of lists. For example, the pattern
+ |
+ | (let-values (((name ...) value) ...) body ...)
+ |
+ | with input
|
| (let-values (((name1 name2) value1) ((name3 name4) value2)) body)
|
@@ -197,9 +201,6 @@
;;; Helper functions
;;; ;;;;;;;;;;;;;;;;;;;;
-(define (empty-map)
- (hashmap bound-identifier-comparator))
-
(define (merge-names oldnames newnames)
;; newnames is the patterns matched in an ellipsis expression. Append
;; each to the lists in `oldnames` associated with each name and return
diff --git a/multisyntax/pattern/producer.scm b/multisyntax/pattern/producer.scm
new file mode 100644
index 0000000..4889c7f
--- /dev/null
+++ b/multisyntax/pattern/producer.scm
@@ -0,0 +1,308 @@
+#| Copyright (c) Peter McGoron 2025
+ |
+ | Licensed under the Apache License, Version 2.0 (the "License");
+ | you may not use this file except in compliance with the License.
+ | You may obtain a copy of the License at
+ |
+ | http://www.apache.org/licenses/LICENSE-2.0
+ |
+ | Unless required by applicable law or agreed to in writing, software
+ | distributed under the License is distributed on an "AS IS" BASIS,
+ | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ | See the License for the specific language governing permissions and
+ | limitations under the License.
+ |------------------------------------------------------------------------
+ | Pattern "producer" for syntax-rules and syntax-case.
+ | Takes the output of a pattern match and the ellipses nesting data for
+ | that pattern match and return a syntax object with the output of the
+ | pattern match.
+ |
+ | This does not timestamp, because that is the expander's job.
+ |------------------------------------------------------------------------
+ | The following uses terms defined in "matcher.scm".
+ |
+ | The "real nesting level" of an identifier (`RNL(I)`) is the nesting level
+ | from the pattern matcher.
+ |
+ | The "producer nesting level" of a pattern `P` (`PNL(P)`) relative to an
+ | input pattern `P'`
+ | is defined similarly to the ellipsis nesting level, except that it is
+ | defined on a producer pattern, and each occurence of an identifier may
+ | have a different producer nesting level. When the input pattern is not
+ | specified, it is the entire input pattern.
+ |
+ | Let `P` be a pattern. Then a subpattern `P'` is closed relative to `P`
+ | if
+ |
+ | * `P'` is an identifier and `PNL(P') >= RNL(P')` (relative to `P`), or
+ | * `P'` is literal, or
+ | * `P'` is a list, vector, or ellipsis pattern where all elements of `P'`
+ | are closed relative to `P'`.
+ |
+ | The output length of a subpattern `P'` of a pattern `P` is the length
+ | of the `PNL(P')` (relative to `P`) nesting level of the matched form.
+ |
+ | A pattern producer `P` is well formed if
+ |
+ | 1. Each subpattern of `P` is closed relative to `P`, and
+ | 2. For each ellipsis pattern `A ...` of `P`,
+ | 1. At least one pattern in `A` is open relative to `A`.
+ | 2. Each pattern `P'` open relative to `A` with `PNL(P') = 0` relative
+ | to `A` has the same output length (relative to `A`).
+ |------------------------------------------------------------------------
+ | A stricter pattern producer would only allow open identifiers inside of
+ | an ellipsis pattern that are a part of the same ellipsis class (i.e.
+ | they were contained in the same outermost ellipsis in the matcher).
+ | This is relaxed to allow for `generate-temporaries` forms bound
+ | with `with-syntax` to be useful. See the example in the R6RS Standard
+ | Libraries, ยง12.7.
+ |--------------------------------------------------------------------
+ | Here's a smoke test for your syntax-rules pattern matcher:
+
+ (define-syntax test
+ (syntax-rules ()
+ ((_ (x ...))
+ (quote ((x (x ...)) ...)))))
+ (test (1 2))
+
+ | This outputs on Chez
+
+ ((1 (1 2)) (2 (1 2)))
+
+ | What does this do?
+
+ (define-syntax test2
+ (syntax-rules ()
+ ((_ (x ...) ...)
+ (quote (((x (x ...)) ...) ...)))))
+
+ (test2 (1 2) (3 4))
+
+ | On Chez it outputs
+
+ (((1 (1 2)) (2 (3 4))) ((3 (1 2)) (4 (3 4))))
+
+ | Surprising! To make sense of it, consider the rewrite
+
+ (define-syntax test3
+ (lambda (x)
+ (syntax-case x ()
+ ((_ (x ...) ...)
+ (with-syntax ((((y ...) ...) #'((x ...) ...)))
+ #'(quote (((x (y ...)) ...) ...)))))))
+
+ | The outermost `...` is more ellipses than `y` was introduced with, so
+ | it will be repeated. The `...` after that will select one of the
+ | nestings of `y`, and the second `...` next to `y` will select one value
+ | in one nesting of `y`.
+ |
+ | The easiest way to make sense of such patterns is to rewrite the
+ | patterns so that each identifier in the production is used at most once.
+ | This is done automatically.
+ |#
+
+(define (rewrite/temporaries pattern bindings)
+ ;; Rewrite `pattern` such that all pattern-bound identifiers occur in
+ ;; `pattern` at most once.
+ (define appearances (set bound-identifier-comparator))
+ (define (add-appearance! id)
+ (set! appearances (set-adjoin appearances id)))
+ (define (appears? id)
+ (set-member? appearances id))
+ (define (add-temporary! id)
+ (let ((new (generate-identifier (syntax->datum id))))
+ (set! bindings (mapping-adjoin bindings
+ new
+ (mapping-ref bindings id)))
+ new))
+ (define (rewrite pattern)
+ (let ((pattern (unwrap-syntax pattern)))
+ (cond
+ ((pair? pattern) (cons (rewrite (car pattern))
+ (rewrite (cdr pattern))))
+ ((vector? pattern) (vector-map rewrite pattern))
+ ((literal? pattern) pattern)
+ ((actual-ellipsis? pattern) pattern)
+ ((and (identifier? pattern) (appears? pattern))
+ (add-temporary! pattern))
+ ((identifier? pattern)
+ (add-appearance! pattern)
+ pattern)
+ (else pattern))))
+ (let ((pattern (rewrite pattern)))
+ (values pattern bindings)))
+
+(define PNL
+ ;; The current producer nesting level relative to the whole producer.
+ ;; Must be an integer.
+ (make-parameter #f))
+
+(define bindings
+ ;; Mapping bound identifiers to their real nesting level.
+ (make-parameter #f))
+
+(define all-bindings (make-parameter #f))
+
+(define (compile-producer literals pattern %bindings ellipsis)
+ ;; Enty point into the producer compiler.
+ ;;
+ ;; This will rewrite the producer so that each bound identifier is used
+ ;; at most once.
+ (parameterize ((matcher-input (vector ellipsis literals)))
+ (let-values (((pattern %bindings)
+ (rewrite/temporaries pattern %bindings)))
+ (parameterize ((PNL 0)
+ (bindings %bindings))
+ (let-values (((producer open-bindings)
+ (compile pattern)))
+ (if (not (hashmap-empty? open-bindings))
+ (error "pattern not closed" pattern)
+ (lambda (bindings)
+ (parameterize ((all-bindings bindings))
+ (producer bindings)))))))))
+
+(define (compile pattern)
+ ;; Returns a procedure that will produce `pattern` given the bindings.
+ (let ((pattern (unwrap-syntax pattern)))
+ (cond
+ ((self-syntax? pattern)
+ (values (lambda (bindings) pattern)
+ (empty-map)))
+ ((pair? pattern) (compile-pair (unwrap-syntax (car pattern))
+ (unwrap-syntax (cdr pattern))))
+ ;; TODO: Vectors
+ #;((vector? pattern) (compile-vector pattern))
+ ((actual-ellipsis? pattern)
+ (error "ellipsis in location where it is not allowed" pattern))
+ ((literal? pattern)
+ (values (lambda (bindings) pattern)
+ (empty-map)))
+ ((identifier? pattern)
+ (values (lambda (bindings) (hashtable-ref bindings pattern))
+ (hashmap bound-element-comparator pattern (PNL))))
+ (else (error "not syntax" pattern)))))
+
+;;; ;;;;;;;;;
+;;; Lists
+;;; ;;;;;;;;;
+
+(define (list-of-ellipses patcdr)
+ ;; Returns `(values i pat)`, where `i` is the number of found ellipses
+ ;; and `pat` is the first non-ellipsis.
+ (let loop ((i 0)
+ (patcdr patcdr))
+ (cond
+ ((null? patcdr) (values i patcdr))
+ ((not (pair? patcdr)) (values i patcdr))
+ (else
+ (let ((patcar (unwrap-syntax (car patcdr))))
+ (if (actual-ellipsis? patcar)
+ (loop (+ i 1) (unwrap-syntax (cdr patcdr)))
+ (values i patcdr)))))))
+
+(define (compile-pair patcar patcdr)
+ (let*-values (((number-of-ellipses next) (list-of-ellipses patcdr))
+ ((produce-next open-identifiers-next) (compile next)))
+ (if (zero? number-of-ellipses)
+ (let-values (((produce-car open-identifiers) (compile next)))
+ (values
+ (lambda (bindings)
+ (cons (produce-car bindings) (produce-next bindings)))
+ (mapping-union open-identifiers open-identifiers-next)))
+ (let-values (((produce-in-ellipsis open-identifiers-of)
+ (compile-in-list-ellipsis patcar)))
+ (if (hashmap-empty? open-identifiers-of)
+ (values
+ ;; If the ellipses binding is completely closed, then pass
+ ;; the pattern producer all the bindings (which is stored as
+ ;; a parameter object).
+ (lambda (bindings)
+ (append (produce-in-ellipsis (all-bindings))
+ (produce-next bindings)))
+ open-identifiers-next)
+ (values
+ (lambda (bindings)
+ (append (produce-in-ellipsis bindings)
+ (produce-next bindings)))
+ (mapping-union open-identifiers
+ open-identifiers-next)))))))
+
+;;; The following prodcures are related to "iterator" maps, which are the
+;;; maps used to output ellipses productions.
+
+(define (length+reverse list)
+ (let loop ((i 0)
+ (list list)
+ (acc 0))
+ (if (null? list)
+ (values i acc)
+ (loop (+ i 1) (cdr list) (cons (car list) acc)))))
+
+(define (open-bindings open-identifiers bindings)
+ ;; Return a map of open identifiers to a correctly ordered list of their
+ ;; captured values. Will raise an error if there is their lengths are not
+ ;; the same.
+ (define length-of-each #f)
+ (hashmap-map
+ (lambda (open-identifier _)
+ (let*-values (((rev-bound) (mapping-ref bindings open-identifier))
+ ((length bound) (length+reverse rev-bound)))
+ (cond
+ ((not length-of-each)
+ (set! length-of-each length)
+ (values open-identifier bound))
+ ((not (= length-of-each length))
+ (error "length mismatch for open identifiers"
+ bindings
+ open-identifiers
+ bound))
+ (else (values open-identifier bound)))))
+ bound-identifier-comparator
+ open-identifiers))
+
+(define (next-binding open-bindings)
+ ;; Returns a map of open identifiers that points to the next bound values
+ ;; to output.
+ (hashmap-map (lambda (identifier values)
+ (values identifier (cdr values)))
+ bound-identifier-comparator
+ open-bindings))
+
+(define (union/current-bindings bindings open-identifiers-map)
+ ;; Return a map of bindings, where the open identifiers are assigned the
+ ;; current bound value in iteration.
+ (hashmap-union
+ (hashmap-map (lambda (open-identifier values)
+ (values open-identifier (car values)))
+ bound-identifier-comparator
+ open-identifiers-map)
+ bindings))
+
+(define (bindings-finished? bindings)
+ ;; Returns true if there are no more bound values to iterate over.
+ (hashmap-any? (lambda (_ values) (null? values)) bindings))
+
+(define (will-be-closed? identifier PNL)
+ (>= PNL (hashmap-ref (bindings) identifier)))
+
+(define (produce-ellipsis-list number-of-ellipses patcar)
+ ;; Prepare a procedure that matches an ellipsis pattern in a list.
+ (let*-values (((produce-part open-identifiers)
+ ;; Get all open identifiers under the ellipses.
+ (parameterize ((PNL (+ (PNL) number-of-ellipses)))
+ (compile patcar)))
+ ((open-identifiers-to-return)
+ ;; Remove identifiers which that will become closed after
+ ;; exiting the ellipses.
+ (hashmap-delete will-be-closed? open-identifiers)))
+ (when (null? all-open-identifiers)
+ (error "ellipsis production does not have open identifiers" patcar))
+ (values (lambda (bindings)
+ (do ((iterated (open-bindings open-identifiers bindings)
+ (next-binding iterated))
+ (patterns (list-accumulator)))
+ ((bindings-finished? iterated) (patterns (eof-object)))
+ (let ((subbindings (union/current-bindings bindings
+ iterated)))
+ (patterns (produce-part subbindings)))))
+ open-identifiers-to-return)))
diff --git a/multisyntax/syntax-object.scm b/multisyntax/syntax-object.scm
index d3058b5..1324182 100644
--- a/multisyntax/syntax-object.scm
+++ b/multisyntax/syntax-object.scm
@@ -108,6 +108,13 @@
;;; to do the actual macro expansion. This still results in linear
;;; time complexity, but with a larger constant.
;;;
+;;; IDEA: Currently the entire algorithm is persistent. One could give up
+;;; persistence and do eager wrap propagation using mutation. This means that
+;;; the input no longer has to be scanned. This would make the additional
+;;; growth linear in the size of introduced identifiers that have not been
+;;; reached yet. This would help in cases where introduced identifiers are
+;;; very far down the tree.
+;;;
;;; The direct-implementation has the property that no eager wrap
;;; propagation must be done. However, it requires linear behavior for the
;;; set of marks and substitutions. This would make the expander very slow