aboutsummaryrefslogtreecommitdiffstats
path: root/mcgoron.ris.parse.scm
blob: 2bd1c422534beccc13db0aa9092e4c11e0f081fd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#| Copyright 2024 Peter McGoron
 |
 | Licensed under the Apache License, Version 2.0 (the "License");
 |
 | you may not use this file except in compliance with the License.
 | You may obtain a copy of the License at
 |
 |     http://www.apache.org/licenses/LICENSE-2.0
 |
 | Unless required by applicable law or agreed to in writing, software
 | distributed under the License is distributed on an "AS IS" BASIS,
 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | See the License for the specific language governing permissions and
 | limitations under the License.
 |#

;;; Stream destructors.

(define (stream-pair=> stream)
  ;; Destructure a stream into its car and cdr.
  (cond-values
    (when-ct (stream-pair? stream)
      (values (stream-car stream) (stream-cdr stream)))))

(define (stream-predicate=> stream predicate?)
  ;; Destructure a stream into its car and cdr if the car satisfies
  ;; PREDICATE?
  (cond-values
    (after ((let (stream-pair=> stream) => (head stream))
            (when (predicate? head)))
      (values head stream))))

(define-syntax with-stream-predicates
  ;; Sequence stream predicates together.
  ;; The final cdr is bound to STREAM. The resultant CAR of each test
  ;; is bound to NAME.
  (syntax-rules ()
    ((_ stream ((predicate? name) ...) body ...)
     (after ((let (stream-predicate=> stream predicate?) => (name stream))
             ...)
       body ...))))

;;; Mapping utils.

(define (add-to-mapping mapping name rest)
  ;; Add REST to the flexvector mapped to NAME in MAPPING. If NAME is
  ;; not mapped, allocate a new flexvector with REST as its first element.
  (let ((fv (mapping-ref/default mapping name #f)))
    (cond
      (fv (flexvector-add-back! fv rest)
          mapping)
      (else (mapping-set mapping name (flexvector rest))))))

;;; Flexvector utils.

(define (flexvector-update-last! fv update)
  ;; Update the last value in FV with the result of applying UPDATE
  ;; to the last value in FV.
  (let ((len (flexvector-length fv)))
    (flexvector-set! fv
                     (- len 1)
                     (update (flexvector-ref fv (- len 1))))))

(define (append-new-line! fv rest)
  ;; Append to the last value in FV the string REST.
  (flexvector-update-last! fv
                           (lambda (x)
                             (string-append x "\n" rest))))

;;; Parsing each line.

(define (field-char? ch)
  ;; Check if CH is a valid RIS field char.
  (or (char-upper-case? ch) (char-numeric? ch)))

(define (end=> stream)
  ;; Check if STREAM is at the end of a line.
  (cond-values
    ;; EOF is end of line
    (when-ct (stream-null? stream)
      stream)
    ;; Handle CRLF
    (with-stream-predicates stream (((cut eqv? #\return <>) _)
                                    ((cut eqv? #\newline <>) _))
      stream)
    ;; Handle LF
    (with-stream-predicates stream (((cut eqv? #\newline <>) _))
      stream)))

(define (read-rest stream)
  ;; Read the rest of the stream up to the end of a line, and return
  ;; it as a string.
  (let ((fv (flexvector)))
    (let read-rest ((stream stream))
      (cond-thunk
        (after ((let (end=> stream) => (stream)))
          (values (flexvector->string fv) stream))
        (after ((let (stream-predicate=> stream char?)
                  => (x stream)))
          (flexvector-add-back! fv x)
          (read-rest stream))
        (else (raise (stream-error stream)))))))

(define (field=> stream)
  ;; At the start of a line, read a field specifier, or return no
  ;; values.
  ;; 
  ;; TODO: This needs to read /[A-Z0-9]{2,}  - /
  (let ((ws? (lambda (x)
               (char=? x #\space))))
    (cond-values
      (with-stream-predicates stream ((field-char? char1)
                                      (field-char? char2)
                                      (ws? _)
                                      (ws? _)
                                      ((cut char=? #\- <>) _)
                                      (ws? _))
        (let-values (((rest stream) (read-rest stream)))
          (values (string char1 char2) rest stream))))))

;;; Parsing of references.

(define (add/special-fields mapping name rest)
  (let ((mapping (add-to-mapping mapping name rest)))
    ;; If NAME is an author field, append the name to the special field
    ;; AUTHOR.
    (if (author-field? name)
        (add-to-mapping mapping "AUTHOR" rest)
        mapping)))

(define (read-reference stream mapping last-name)
  ;; Read an entire reference up to the ER field. This function starts
  ;; at the beginning of a line.
  ;; 
  ;; All new fields are added to MAPPING. Repeated fields are appended
  ;; to a flexvector that the field name maps to.
  ;; 
  ;; If the beginning of the line is not a field, then it is a
  ;; continuation of the last field, specified by LAST-NAME.
  ;; 
  ;; Returns (VALUES STREAM MAPPING) after ER.
  (cond-thunk
    (after ((let (field=> stream) => (name rest stream)))
      (if (equal? name "ER")
          (values stream mapping)
          (let ((mapping (add/special-fields mapping name rest)))
            (read-reference stream mapping name))))
    (when-ct (stream-null? stream)
      (values stream mapping))
    (else
     (let-values
         (((fv) (mapping-ref mapping last-name))
          ((rest stream) (read-rest stream)))
       (append-new-line! rest)
       (read-reference stream mapping last-name)))))

(define (new-mapping name rest)
  (add-to-mapping (mapping (make-default-comparator)) name rest))

(define (read-new-reference stream references-fv)
  ;; Read the start of a new reference.
  ;; 
  ;; If the file is at EOF, return MAPPING.
  ;; Otherwise, read the TY field of the new reference. If there is no
  ;; TY field, raise MALFORMED-REFERENCE-ERROR.
  (cond-thunk
    (when-ct (stream-null? stream)
      references-fv)
    (after ((let (field=> stream) => (name rest stream))
            (when (string=? name "TY")))
      (let-values (((stream ref-mapping)
                    (read-reference stream
                                    (new-mapping name rest)
                                    name)))
        (flexvector-add-back! references-fv ref-mapping)
        (read-new-reference stream references-fv)))
    (else (raise (malformed-reference-error stream)))))

(define (ris->mapping port)
  (read-new-reference (port->stream port)
                      (flexvector)))