commit e690d99f574cafac755704e02de3c41d0224561f
parent b5f09c0d39e85624273202cf13427f0c7de49f3d
Author: Jay McCarthy <jay@racket-lang.org>
Date: Mon, 28 May 2012 11:52:23 -0600
Fixing PR12779
original commit: 8b035f3c736d92c5e8ae896b3044eb2e32f44c8a
Diffstat:
4 files changed, 194 insertions(+), 41 deletions(-)
diff --git a/collects/scriblib/bibtex.rkt b/collects/scriblib/bibtex.rkt
@@ -12,23 +12,23 @@
(define (perror ip sym fmt . args)
(define loc (call-with-values (λ () (port-next-location ip)) list))
(apply error sym (string-append fmt " @ line ~a column ~a byte ~a") (append args loc)))
-
+
(define (read-while pred ip)
(list->string
(let loop ()
(match (peek-char ip)
- [(? pred)
+ [(and (? char?) (? pred))
(cons (read-char ip)
(loop))]
[_
empty]))))
-
+
(define (read-until pred ip)
(read-while (negate pred) ip))
-
+
(define (slurp-whitespace ip)
(read-while (λ (c) (and (char? c) (char-whitespace? c))) ip))
-
+
(define (read-entries ip)
(slurp-whitespace ip)
(match (read-char ip)
@@ -41,10 +41,13 @@
[(? eof-object?)
(void)]
[c
- (perror ip 'read-entries "Expected % or @, got ~v" c)]))
-
+ ;; All other characters are comments.
+ (read-entries ip)]))
+
(define (read-entry ip)
- (match (read-until (λ (c) (char=? c #\{)) ip)
+ (match (read-until (λ (c) (or (char=? c #\{)
+ (char=? c #\()))
+ ip)
[(app string-downcase "string")
(slurp-whitespace ip)
(match (read-char ip)
@@ -84,56 +87,91 @@
(let loop ()
(slurp-whitespace ip)
(define atag (read-tag ip))
- (slurp-whitespace ip)
- (match (read-char ip)
- [#\=
+ (cond
+ [(string=? "" atag)
+ (read-char ip)
+ (hash)]
+ [else
(slurp-whitespace ip)
- (define aval (read-value ip))
(match (read-char ip)
- [#\,
- (hash-set (loop) atag aval)]
- [#\}
- (hash atag aval)]
+ [#\=
+ (slurp-whitespace ip)
+ (define aval (read-value ip))
+ (slurp-whitespace ip)
+ (match (read-char ip)
+ [#\,
+ (hash-set (loop) atag aval)]
+ [#\}
+ (hash atag aval)]
+ [c
+ (perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])]
[c
- (perror ip 'read-entry "Parsing entry, expected , or }, got ~v; label is ~v; atag is ~v; aval is ~v" c label atag aval)])]
- [c
- (perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)])))
+ (perror ip 'read-entry "Parsing entry tag, expected =, got ~v; label is ~v; atag is ~v" c label atag)])])))
(hash-set! ENTRY-DB label
(hash-set alist 'type typ))]))
-
+
(define (read-tag ip)
(slurp-whitespace ip)
- (string-downcase
- (read-until
+ (string-downcase
+ (read-until
(λ (c) (or (char-whitespace? c)
- (char=? c #\=)))
+ (char=? c #\=)
+ (char=? c #\{)
+ (char=? c #\})))
ip)))
-
+
+ (define (read-braced-value ip)
+ (read-char ip)
+ (let loop ()
+ (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\})))
+ ip))
+ (match (peek-char ip)
+ [#\{
+ (string-append first-part (read-value ip) (loop))]
+ [#\}
+ (read-char ip)
+ first-part])))
+
(define (read-value ip)
(slurp-whitespace ip)
+ (define first-part (read-value-single ip))
+ (slurp-whitespace ip)
+ (match (peek-char ip)
+ [#\#
+ (read-char ip)
+ (string-append first-part (read-value ip))]
+ [_
+ first-part]))
+
+ (define (read-value-single ip)
+ (slurp-whitespace ip)
(match (peek-char ip)
[#\{
+ (read-braced-value ip)]
+ [#\"
(read-char ip)
(let loop ()
- (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\})))
+ (define first-part (read-until (λ (c) (or (char=? c #\{) (char=? c #\")))
ip))
(match (peek-char ip)
[#\{
- (string-append first-part (read-value ip) (loop))]
- [#\}
+ (string-append first-part (read-braced-value ip) (loop))]
+ [#\"
(read-char ip)
first-part]))]
[(? char-numeric?)
(read-while char-numeric? ip)]
[(? char-alphabetic?)
- (define string-tag (read-until (λ (c) (char=? c #\,)) ip))
+ (define string-tag (read-until (λ (c) (or (char-whitespace? c)
+ (char=? c #\,)))
+ ip))
(hash-ref STRING-DB string-tag
(λ () string-tag))]
[c
(perror ip 'read-value "Parsing value, expected {, got ~v" c)]))
-
+
(read-entries ip)
-
+
(bibdb ENTRY-DB (make-hash)))
(define (path->bibdb pth)
@@ -155,11 +193,11 @@
(define bibtex-db (path->bibdb bib-pth))
(define-cite autobib-cite autobib-citet generate-bibliography-id)
(define ((make-citer citer) f . r)
- (apply citer
- (filter-map
+ (apply citer
+ (filter-map
(λ (key)
- (and (not (string=? "\n" key))
- (generate-bib bibtex-db key)))
+ (and (not (string=? "\n" key))
+ (generate-bib bibtex-db key)))
(append-map (curry regexp-split #rx" +")
(cons f r)))))
(define ~cite-id (make-citer autobib-cite))
@@ -189,7 +227,7 @@
(define (raw-attr a [def #f])
(hash-ref the-raw a def))
(define (raw-attr* a)
- (hash-ref the-raw a
+ (hash-ref the-raw a
(λ () (error 'bibtex "Key ~a is missing attribute ~a, has ~a"
key a the-raw))))
(match (raw-attr 'type)
@@ -228,14 +266,17 @@
(make-bib #:title (raw-attr "title")
#:author (parse-author (raw-attr "author"))
#:date (raw-attr "year")
- #:location
+ #:location
(match* ((raw-attr "institution") (raw-attr "number"))
- [(#f #f) @elem{}]
- [(l #f) @elem{@|l|}]
- [(#f n) @elem{@|n|}]
- [(l n) @elem{@|l|, @|n|}])
+ [(#f #f) @elem{}]
+ [(l #f) @elem{@|l|}]
+ [(#f n) @elem{@|n|}]
+ [(l n) @elem{@|l|, @|n|}])
#:url (raw-attr "url"))]
[_
(make-bib #:title (format "~v" the-raw))]))))
-(provide define-bibtex-cite)
+(provide (struct-out bibdb)
+ path->bibdb
+ bibtex-parse
+ define-bibtex-cite)
diff --git a/collects/scriblib/scribblings/bibtex.scrbl b/collects/scriblib/scribblings/bibtex.scrbl
@@ -18,3 +18,18 @@ Uses @racket[define-cite] from @racketmodname[scriblib/autobib], but augments th
Each string is broken along spaces into citations keys that are looked up in the BibTeX database and turned into @racket[bib?] structures.
}
+
+@defstruct*[bibdb ([raw (hash/c string? (hash/c string? string?))]
+ [bibs (hash/c string? bib?)])]{
+ Represents a BibTeX database. The @racket[_raw] hash table maps the labels in the file to hash tables of the attributes and their values. The @racket[_bibs] hash table maps the same labels to Scribble data-structures representing the same information.
+ }
+
+@defproc[(path->bibdb [path path-string?])
+ bibdb?]{
+ Parses a path into a BibTeX database.
+ }
+
+@defproc[(bibtex-parse [ip input-port?])
+ bibdb?]{
+ Parses an input port into a BibTeX database.
+ }
diff --git a/collects/tests/scriblib/bibtex.rkt b/collects/tests/scriblib/bibtex.rkt
@@ -7,6 +7,15 @@
(test
(let ()
+ (define example (path->bibdb example.bib))
+ (define raw (bibdb-raw example))
+
+ (test
+ (hash-ref (hash-ref raw "sweig42") "month") => "march"
+ (hash-ref (hash-ref raw "sweig42a") "month") => "1~mar"
+ (hash-ref (hash-ref raw "sweig42b") "month") => "1~march"
+ (hash-ref (hash-ref raw "sweig42c") "month") => "1~marcha"))
+ (let ()
(define-bibtex-cite example.bib
~cite-id citet-id generate-bibliography-id)
diff --git a/collects/tests/scriblib/example.bib b/collects/tests/scriblib/example.bib
@@ -309,3 +309,91 @@
Year = {1987},
Bdsk-Url-1 = {http://doi.acm.org/10.1145/41625.41654}}
+@article{hochreiter_long_1997,
+ author = {Hochreiter, S. and Schmidhuber, J.},
+ title = {Long {Short-Term} Memory},
+ volume = {9},
+ number = {8},
+ journal = {Neural Computation},
+ year = {1997},
+ pages = {1735--1780}
+}
+
+@Article{py03,
+ author = {Xavier D\'ecoret},
+ title = "PyBiTex",
+ year = 2003
+}
+
+@Article{key03,
+ title = "A {bunch {of} braces {in}} title"
+}
+
+@Article{key01,
+ author = "Simon {"}the {saint"} Templar",
+}
+
+@Article{key01,
+ title = "The history of @ sign"
+}
+
+Some {{comments} with unbalanced braces
+....and a "commented" entry...
+
+Book{landru21,
+ author = {Landru, Henri D\'esir\'e},
+ title = {A hundred recipes for you wife},
+ publisher = {Culinary Expert Series},
+ year = 1921
+}
+
+..some other comments..before a valid entry...
+
+@Book{steward03,
+ author = { Martha Steward },
+ title = {Cooking behind bars},
+ publisher = {Culinary Expert Series},
+ year = 2003
+}
+
+...and finally an entry commented by the use of the special @Comment entry type.
+
+@Comment{steward03,
+ author = {Martha Steward},
+ title = {Cooking behind bars},
+ publisher = {Culinary Expert Series},
+ year = 2003
+}
+
+@Comment{
+ @Book{steward03,
+ author = {Martha Steward},
+ title = {Cooking behind bars},
+ publisher = {Culinary Expert Series},
+ year = 2003
+ }
+}
+
+@String{mar = "march"}
+
+@Book{sweig42,
+ Author = { Stefan Sweig },
+ title = { The impossible book },
+ publisher = { Dead Poet Society},
+ year = 1942,
+ month = mar
+}
+
+@Book{sweig42a,
+ month = "1~mar"
+}
+
+@Book{sweig42b,
+ month = "1~" # mar
+}
+
+@Book{sweig42c,
+ month = "1~" # mar # "a"
+}
+
+@preamble{"This bibliography was generated on \today"}