aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile4
-rw-r--r--mcgoron.ris.data.scm166
-rw-r--r--mcgoron.ris.data.sld21
-rw-r--r--mcgoron.ris.parse.scm115
-rw-r--r--mcgoron.ris.parse.sld2
-rw-r--r--sris.egg9
-rw-r--r--tests/run.scm3
7 files changed, 280 insertions, 40 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..dcaa6e7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
+.POSIX:
+
+clean:
+ rm *.build.sh *.install.sh *.import.scm *.so *.link *.o
diff --git a/mcgoron.ris.data.scm b/mcgoron.ris.data.scm
new file mode 100644
index 0000000..83af524
--- /dev/null
+++ b/mcgoron.ris.data.scm
@@ -0,0 +1,166 @@
+#| Copyright 2024 Peter McGoron
+ |
+ | Licensed under the Apache License, Version 2.0 (the "License");
+ |
+ | you may not use this file except in compliance with the License.
+ | You may obtain a copy of the License at
+ |
+ | http://www.apache.org/licenses/LICENSE-2.0
+ |
+ | Unless required by applicable law or agreed to in writing, software
+ | distributed under the License is distributed on an "AS IS" BASIS,
+ | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ | See the License for the specific language governing permissions and
+ | limitations under the License.
+ |------------------------------------------------------------------------|
+ | This file implements mapping from RIS codes to English descriptions.
+ | The sets and maps are implemented as parameters so they can be changed
+ | for any non-standard RIS files.
+ |#
+
+;;; Utility functions.
+
+(define (make-mapping-parameter default)
+ (make-parameter
+ default
+ (lambda (x)
+ (cond
+ ((mapping? x) x)
+ ((pair? x) (alist->mapping (make-default-comparator) x))
+ (else (error "parameter must be a map" x))))))
+
+(define (make-set-parameter default)
+ (make-parameter
+ default
+ (lambda (x)
+ (cond
+ ((set? x) x)
+ ((pair? x) (list->set (make-default-comparator) x))
+ (else (error "parameter must be a set" x))))))
+
+(define (parameter-lookup param)
+ (lambda (x)
+ (mapping-ref/default (param) x #f)))
+
+;;; Standard author fields.
+;;; There are 4 author fields, but some information about the authors
+;;; can be interleaved in with the author fields (for instance, "AD",
+;;; author address).
+;;;
+;;; To work around this bad design choice, all authors are put into a
+;;; meta-field "AUTHORS" which deliberately does not conform to the RIS
+;;; format.
+;;;
+;;; This should have been a nested format like sexprs or JSON. At least
+;;; it's not XML.
+
+(define author-fields
+ (make-set-parameter
+ '("AU" "A2" "A3" "A4")))
+
+(define (author-field? x)
+ (set-contains? (author-fields) x))
+
+;;; Maps from standard type and field codes to their descriptions.
+
+(define type-codes
+ (make-mapping-parameter
+ '(("GEN" . "Generic")
+ ("ART" . "Artwork")
+ ("ABST" . "Abstract")
+ ("AGGR" . "Aggregated Database")
+ ("ANCIENT" . "Ancient Text")
+ ("ADVS" . "Audiovisual Material")
+ ("BILL" . "Bill")
+ ("BLOG" . "Blog")
+ ("BOOK" . "Book")
+ ("CHAP" . "Book Section")
+ ("CASE" . "Case")
+ ("CTLG" . "Catalog")
+ ("CHART" . "Chart")
+ ("CLSWK" . "Classical Work")
+ ("COMP" . "Computer Program")
+ ("CPAPER" . "Conference Paper")
+ ("CONF" . "Conference Proceeding")
+ ("DATA" . "Dataset")
+ ("DICT" . "Dictionary")
+ ("EDBOOK" . "Edited Book")
+ ("EBOOK" . "Electronic Book")
+ ("ECHAP" . "Electronic Book Section")
+ ("EJOUR" . "Electronic Article")
+ ("ENCYC" . "Encyclopedia")
+ ("EQUA" . "Equation")
+ ("FIGURE" . "Figure")
+ ("MPCT" . "Film or Broadcast")
+ ("HEAR" . "Hearing")
+ ("INPR" . "In Press Article")
+ ("ICOMM" . "Internet Communication")
+ ("JOUR" . "Journal Article")
+ ("LEGAL" . "Legal Rule")
+ ("MGZN" . "Magazine Article")
+ ("MANSCPT" . "Manuscript")
+ ("MAP" . "Map")
+ ("MUSIC" . "Music")
+ ("NEWS" . "Newspaper Article")
+ ("DBASE" . "Online Database")
+ ("MULTI" . "Online Multimedia")
+ ("PAMP" . "Pamphlet")
+ ("PAT" . "Patent")
+ ("PCOMM" . "Personal Communication")
+ ("RPRT" . "Report")
+ ("SER" . "Serial")
+ ("SLIDE" . "Slide")
+ ("SOUND" . "Sound Recording")
+ ("STAND" . "Standard")
+ ("STAT" . "Statute")
+ ("THES" . "Thesis")
+ ("UNBILL" . "Unenacted Bill")
+ ("UNPD" . "Unpublished Work")
+ ("VIDEO" . "Video Recording")
+ ("ELEC" . "Web Page"))))
+
+(define ris-type-name
+ (parameter-lookup type-codes))
+
+(define field-codes
+ (make-mapping-parameter
+ '(("TY" . "GEN")
+ ("A2" . "Secondary Author")
+ ("A3" . "Tertiary Author")
+ ("A4" . "Subsidiary Author")
+ ("AB" . "Abstract")
+ ("AD" . "Author Address")
+ ("AN" . "Accession Number")
+ ("AU" . "Author")
+ ("C1" . "Custom 1")
+ ("C2" . "Custom 2")
+ ("C3" . "Custom 3")
+ ("C4" . "Custom 4")
+ ("C5" . "Custom 5")
+ ("C6" . "Custom 6")
+ ("C7" . "Custom 7")
+ ("C8" . "Custom 8")
+ ("CA" . "Call Number")
+ ("CN" . "Caption")
+ ("CY" . "Place Published")
+ ("DA" . "Date")
+ ("DB" . "Name of Database")
+ ("DO" . "DOI")
+ ("DP" . "Database Provider")
+ ("ET" . "Edition")
+ ("J2" . "Alternate Title")
+ ("KW" . "Keywords")
+ ("L1" . "File Attachments")
+ ("L4" . "Language")
+ ("LA" . "Figure")
+ ("LB" . "Label")
+ ("IS" . "Number")
+ ("M3" . "Type of Work")
+ ("N1" . "Notes")
+ ("NV" . "Number of Volumes")
+ ("OP" . "Original Publication")
+ ("PB" . "Publisher")
+ ("PY" . "Year"))))
+
+(define ris-field-name
+ (parameter-lookup field-codes))
diff --git a/mcgoron.ris.data.sld b/mcgoron.ris.data.sld
new file mode 100644
index 0000000..4b32d34
--- /dev/null
+++ b/mcgoron.ris.data.sld
@@ -0,0 +1,21 @@
+#| Copyright 2024 Peter McGoron
+ |
+ | Licensed under the Apache License, Version 2.0 (the "License");
+ |
+ | you may not use this file except in compliance with the License.
+ | You may obtain a copy of the License at
+ |
+ | http://www.apache.org/licenses/LICENSE-2.0
+ |
+ | Unless required by applicable law or agreed to in writing, software
+ | distributed under the License is distributed on an "AS IS" BASIS,
+ | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ | See the License for the specific language governing permissions and
+ | limitations under the License.
+ |#
+
+(define-library (mcgoron ris data)
+ (import (scheme base) (srfi 146) (srfi 113) (srfi 128))
+ (export author-fields type-codes field-codes
+ author-field? ris-type-name ris-field-name)
+ (include "mcgoron.ris.data.scm")) \ No newline at end of file
diff --git a/mcgoron.ris.parse.scm b/mcgoron.ris.parse.scm
index 3711a96..2bd1c42 100644
--- a/mcgoron.ris.parse.scm
+++ b/mcgoron.ris.parse.scm
@@ -17,55 +17,79 @@
;;; Stream destructors.
(define (stream-pair=> stream)
+ ;; Destructure a stream into its car and cdr.
(cond-values
(when-ct (stream-pair? stream)
(values (stream-car stream) (stream-cdr stream)))))
(define (stream-predicate=> stream predicate?)
+ ;; Destructure a stream into its car and cdr if the car satisfies
+ ;; PREDICATE?
(cond-values
(after ((let (stream-pair=> stream) => (head stream))
(when (predicate? head)))
(values head stream))))
(define-syntax with-stream-predicates
+ ;; Sequence stream predicates together.
+ ;; The final cdr is bound to STREAM. The resultant CAR of each test
+ ;; is bound to NAME.
(syntax-rules ()
((_ stream ((predicate? name) ...) body ...)
(after ((let (stream-predicate=> stream predicate?) => (name stream))
...)
body ...))))
-;;; Mapping destructors.
+;;; Mapping utils.
-(define (mapping-ref=> mapping key)
- (mapping-ref mapping key (lambda () (values))))
-
-(define (mapping-new-field mapping key val)
- (mapping-set mapping key (flexvector val)))
+(define (add-to-mapping mapping name rest)
+ ;; Add REST to the flexvector mapped to NAME in MAPPING. If NAME is
+ ;; not mapped, allocate a new flexvector with REST as its first element.
+ (let ((fv (mapping-ref/default mapping name #f)))
+ (cond
+ (fv (flexvector-add-back! fv rest)
+ mapping)
+ (else (mapping-set mapping name (flexvector rest))))))
;;; Flexvector utils.
(define (flexvector-update-last! fv update)
+ ;; Update the last value in FV with the result of applying UPDATE
+ ;; to the last value in FV.
(let ((len (flexvector-length fv)))
(flexvector-set! fv
(- len 1)
(update (flexvector-ref fv (- len 1))))))
+(define (append-new-line! fv rest)
+ ;; Append to the last value in FV the string REST.
+ (flexvector-update-last! fv
+ (lambda (x)
+ (string-append x "\n" rest))))
+
;;; Parsing each line.
-(define (prefix-char? ch)
+(define (field-char? ch)
+ ;; Check if CH is a valid RIS field char.
(or (char-upper-case? ch) (char-numeric? ch)))
(define (end=> stream)
+ ;; Check if STREAM is at the end of a line.
(cond-values
+ ;; EOF is end of line
(when-ct (stream-null? stream)
stream)
+ ;; Handle CRLF
(with-stream-predicates stream (((cut eqv? #\return <>) _)
((cut eqv? #\newline <>) _))
stream)
+ ;; Handle LF
(with-stream-predicates stream (((cut eqv? #\newline <>) _))
stream)))
(define (read-rest stream)
+ ;; Read the rest of the stream up to the end of a line, and return
+ ;; it as a string.
(let ((fv (flexvector)))
(let read-rest ((stream stream))
(cond-thunk
@@ -77,12 +101,16 @@
(read-rest stream))
(else (raise (stream-error stream)))))))
-(define (prefix=> stream)
+(define (field=> stream)
+ ;; At the start of a line, read a field specifier, or return no
+ ;; values.
+ ;;
+ ;; TODO: This needs to read /[A-Z0-9]{2,} - /
(let ((ws? (lambda (x)
(char=? x #\space))))
(cond-values
- (with-stream-predicates stream ((prefix-char? char1)
- (prefix-char? char2)
+ (with-stream-predicates stream ((field-char? char1)
+ (field-char? char2)
(ws? _)
(ws? _)
((cut char=? #\- <>) _)
@@ -92,48 +120,63 @@
;;; Parsing of references.
+(define (add/special-fields mapping name rest)
+ (let ((mapping (add-to-mapping mapping name rest)))
+ ;; If NAME is an author field, append the name to the special field
+ ;; AUTHOR.
+ (if (author-field? name)
+ (add-to-mapping mapping "AUTHOR" rest)
+ mapping)))
+
(define (read-reference stream mapping last-name)
+ ;; Read an entire reference up to the ER field. This function starts
+ ;; at the beginning of a line.
+ ;;
+ ;; All new fields are added to MAPPING. Repeated fields are appended
+ ;; to a flexvector that the field name maps to.
+ ;;
+ ;; If the beginning of the line is not a field, then it is a
+ ;; continuation of the last field, specified by LAST-NAME.
+ ;;
+ ;; Returns (VALUES STREAM MAPPING) after ER.
(cond-thunk
- (after ((let (prefix=> stream) => (name rest stream)))
- (cond-thunk
- (when-ct (equal? name "ER")
- (values stream mapping))
- (after ((let (mapping-ref=> mapping name) => (fv)))
- (flexvector-add-back! fv rest)
- (read-reference stream mapping name))
- (else
- (read-reference stream
- (mapping-new-field mapping
- name
- rest)
- name))))
+ (after ((let (field=> stream) => (name rest stream)))
+ (if (equal? name "ER")
+ (values stream mapping)
+ (let ((mapping (add/special-fields mapping name rest)))
+ (read-reference stream mapping name))))
(when-ct (stream-null? stream)
(values stream mapping))
(else
(let-values
- (((fv) (mapping-ref mapping
- last-name
- (lambda () (error "internal error" stream))))
+ (((fv) (mapping-ref mapping last-name))
((rest stream) (read-rest stream)))
- (flexvector-update-last! fv
- (lambda (last)
- (string-append last "\n" rest)))
+ (append-new-line! rest)
(read-reference stream mapping last-name)))))
-(define (read-new-reference stream mapping)
+(define (new-mapping name rest)
+ (add-to-mapping (mapping (make-default-comparator)) name rest))
+
+(define (read-new-reference stream references-fv)
+ ;; Read the start of a new reference.
+ ;;
+ ;; If the file is at EOF, return MAPPING.
+ ;; Otherwise, read the TY field of the new reference. If there is no
+ ;; TY field, raise MALFORMED-REFERENCE-ERROR.
(cond-thunk
(when-ct (stream-null? stream)
- mapping)
- (after ((let (prefix=> stream) => (name rest stream))
+ references-fv)
+ (after ((let (field=> stream) => (name rest stream))
(when (string=? name "TY")))
- (let-values (((stream mapping)
+ (let-values (((stream ref-mapping)
(read-reference stream
- (mapping-new-field mapping name rest)
+ (new-mapping name rest)
name)))
- (read-new-reference stream mapping)))
+ (flexvector-add-back! references-fv ref-mapping)
+ (read-new-reference stream references-fv)))
(else (raise (malformed-reference-error stream)))))
(define (ris->mapping port)
(read-new-reference (port->stream port)
- (mapping (make-default-comparator))))
+ (flexvector)))
diff --git a/mcgoron.ris.parse.sld b/mcgoron.ris.parse.sld
index aa02ac0..ddc2f85 100644
--- a/mcgoron.ris.parse.sld
+++ b/mcgoron.ris.parse.sld
@@ -18,6 +18,6 @@
(import (scheme base) (scheme char)
(srfi 214) (srfi 128) (srfi 146) (srfi 26) (srfi 41)
(mcgoron cond-thunk) (mcgoron cond-thunk values)
- (mcgoron ris exceptions))
+ (mcgoron ris exceptions) (mcgoron ris data))
(export ris->mapping)
(include "mcgoron.ris.parse.scm")) \ No newline at end of file
diff --git a/sris.egg b/sris.egg
index bfb8566..a3a21e5 100644
--- a/sris.egg
+++ b/sris.egg
@@ -3,12 +3,17 @@
(synopsis "Parse RIS files")
(category "parsing")
(license "Apache-2.0")
- (dependencies "r7rs")
+ (dependencies "r7rs" "srfi-146" "srfi-113" "srfi-214" "srfi-128"
+ "srfi-146" "srfi-41" "srfi-128")
(test-dependencies "test")
(components (extension mcgoron.ris.exceptions
(source "mcgoron.ris.exceptions.sld")
(csc-options "-R" "r7rs" "-X" "r7rs"))
+ (extension mcgoron.ris.data
+ (source "mcgoron.ris.data.sld")
+ (csc-options "-R" "r7rs" "-X" "r7rs"))
(extension mcgoron.ris.parse
(source "mcgoron.ris.parse.sld")
(csc-options "-R" "r7rs" "-X" "r7rs")
- (component-dependencies mcgoron.ris.exceptions))))
+ (component-dependencies mcgoron.ris.exceptions
+ mcgoron.ris.data))))
diff --git a/tests/run.scm b/tests/run.scm
index 1d80564..f5c95ae 100644
--- a/tests/run.scm
+++ b/tests/run.scm
@@ -17,7 +17,8 @@
(import r7rs (mcgoron ris parse) test (srfi 146) (srfi 214))
(define file
- (call-with-input-file "10.1038_s41467-022-34369-4-citation.ris" ris->mapping))
+ (flexvector-front
+ (call-with-input-file "10.1038_s41467-022-34369-4-citation.ris" ris->mapping)))
(test-begin "10.1038_s41467-022-34369-4-citation.ris")
(define-syntax test-ref