decode.rkt (14138B)
1 #lang scheme/base 2 (require "core.rkt" 3 "private/provide-structs.rkt" 4 "decode-struct.rkt" 5 racket/contract/base 6 racket/contract/combinator 7 scheme/list) 8 9 (define (pre-content? i) 10 (or (string? i) 11 (content? i) 12 (and (splice? i) 13 (andmap pre-content? (splice-run i))) 14 (and (list? i) 15 (andmap pre-content? i)) 16 (void? i))) 17 18 (define (pre-flow? i) 19 (or (string? i) 20 (content? i) 21 (block? i) 22 (and (splice? i) 23 (andmap pre-flow? (splice-run i))) 24 (and (list? i) 25 (andmap pre-flow? i)) 26 (void? i))) 27 28 (define (pre-part? v) 29 (or (pre-flow? v) 30 (title-decl? v) 31 (part-start? v) 32 (part-index-decl? v) 33 (part-collect-decl? v) 34 (part-tag-decl? v) 35 (part? v) 36 (and (splice? v) 37 (andmap pre-part? (splice-run v))) 38 (and (list? v) 39 (andmap pre-part? v)))) 40 41 (provide-structs 42 [title-decl ([tag-prefix (or/c false/c string?)] 43 [tags (listof tag?)] 44 [version (or/c string? false/c)] 45 [style style?] 46 [content content?])] 47 [part-start ([depth integer?] 48 [tag-prefix (or/c false/c string?)] 49 [tags (listof tag?)] 50 [style style?] 51 [title content?])] 52 [splice ([run list?])] 53 [part-index-decl ([plain-seq (listof string?)] 54 [entry-seq list?])] 55 [part-collect-decl ([element (or/c element? part-relative-element?)])] 56 [part-tag-decl ([tag tag?])]) 57 58 (provide whitespace? 59 pre-content? 60 pre-flow? 61 pre-part?) 62 63 (provide/contract 64 [decode (-> (listof pre-part?) 65 part?)] 66 [decode-part (-> (listof pre-part?) 67 (listof string?) 68 (or/c #f content?) 69 exact-nonnegative-integer? 70 part?)] 71 [decode-flow (-> (listof pre-flow?) 72 (listof block?))] 73 [decode-paragraph (-> (listof pre-content?) 74 paragraph?)] 75 [decode-compound-paragraph (-> (listof pre-flow?) 76 block?)] 77 [decode-content (-> (listof pre-content?) 78 content?)] 79 [rename decode-content decode-elements 80 (-> (listof pre-content?) 81 content?)] 82 [decode-string (-> string? content?)] 83 [clean-up-index-string (-> string? string?)]) 84 85 (define (spliceof c) 86 (define name `(spliceof ,(contract-name c))) 87 (define p (flat-contract-predicate c)) 88 (make-flat-contract #:name name 89 #:first-order (lambda (x) 90 (and (splice? x) 91 (andmap p (splice-run x)))))) 92 (provide/contract 93 [spliceof (flat-contract? . -> . flat-contract?)]) 94 95 (define the-part-index-desc (make-part-index-desc)) 96 97 (define (clean-up-index-string s) 98 ;; Collapse whitespace, and remove leading or trailing spaces, which 99 ;; might appear there due to images or something else that gets 100 ;; dropped in string form. 101 (let* ([s (regexp-replace* #px"\\s+" s " ")] 102 [s (regexp-replace* #rx"^ " s "")] 103 [s (regexp-replace* #rx" $" s "")]) 104 (datum-intern-literal s))) 105 106 107 (define (decode-string s) 108 (define pattern #rx"(---|--|``|''|'|`)") 109 (let loop ([start 0]) 110 (cond 111 [(regexp-match-positions pattern s start) 112 => (lambda (m) 113 (define the-match (substring s (caar m) (cdar m))) 114 (list* (datum-intern-literal (substring s start (caar m))) 115 (cond 116 [(string=? the-match "---") 'mdash] 117 [(string=? the-match "--") 'ndash] 118 [(string=? the-match "``") 'ldquo] 119 [(string=? the-match "''") 'rdquo] 120 [(string=? the-match "'") 'rsquo] 121 [(string=? the-match "`") 'lsquo]) 122 (loop (cdar m))))] 123 ;; Common case: nothing to decode, so don't copy strings. 124 ;; Assume that the input is already interned. 125 [(= start 0) 126 (list s)] 127 [else 128 (list (datum-intern-literal (substring s start)))]))) 129 130 131 (define (line-break? v) 132 (equal? v "\n")) 133 134 (define (whitespace? v) 135 (and (string? v) (regexp-match? #px"^[\\s]*$" v))) 136 137 (define (decode-accum-para accum) 138 (if (andmap whitespace? accum) 139 null 140 (list (decode-compound-paragraph (reverse (skip-whitespace accum)))))) 141 142 (define (decode-flow* l keys colls tag-prefix tags vers style title part-depth) 143 (let loop ([l l] [next? #f] [keys keys] [colls colls] [accum null] 144 [title title] [tag-prefix tag-prefix] [tags tags] [vers vers] 145 [style style]) 146 (cond 147 [(null? l) 148 (let ([k-tags (map (lambda (k) `(idx ,(make-generated-tag))) keys)] 149 [tags (if (null? tags) 150 (list `(part ,(make-generated-tag))) 151 tags)]) 152 (make-part 153 tag-prefix 154 (append tags k-tags) 155 title 156 (if vers 157 (make-style (style-name style) 158 (cons (make-document-version vers) 159 (style-properties style))) 160 style) 161 (let ([l (append 162 (map (lambda (k tag) 163 (make-index-element #f null tag 164 (part-index-decl-plain-seq k) 165 (part-index-decl-entry-seq k) 166 #f)) 167 keys k-tags) 168 colls)]) 169 (if (and title 170 (not (memq 'hidden (style-properties style))) 171 (not (memq 'no-index (style-properties style)))) 172 (cons (make-index-element 173 #f null (car tags) 174 (list (clean-up-index-string 175 (regexp-replace #px"^\\s+(?:(?:A|An|The)\\s)?" 176 (content->string title) ""))) 177 (list (make-element #f title)) 178 the-part-index-desc) 179 l) 180 l)) 181 (decode-accum-para accum) 182 null))] 183 [(void? (car l)) 184 (loop (cdr l) next? keys colls accum title tag-prefix tags vers style)] 185 [(title-decl? (car l)) 186 (cond [(not part-depth) (error 'decode "misplaced title: ~e" (car l))] 187 [title (error 'decode "found extra title: ~v" (car l))] 188 [else (loop (cdr l) next? keys colls accum 189 (title-decl-content (car l)) 190 (title-decl-tag-prefix (car l)) 191 (title-decl-tags (car l)) 192 (title-decl-version (car l)) 193 (title-decl-style (car l)))])] 194 #; 195 ;; Blocks are now handled by decode-accum-para 196 [(block? (car l)) 197 (let ([para (decode-accum-para accum)] 198 [part (decode-flow* (cdr l) keys colls tag-prefix tags vers style 199 title part-depth)]) 200 (make-part 201 (part-tag-prefix part) 202 (part-tags part) 203 (part-title-content part) 204 (part-style part) 205 (part-to-collect part) 206 (append para (list (car l)) (part-flow part)) 207 (part-parts part)))] 208 [(part? (car l)) 209 (let ([para (decode-accum-para accum)] 210 [part (decode-flow* (cdr l) keys colls tag-prefix tags vers style 211 title part-depth)]) 212 (make-part 213 (part-tag-prefix part) 214 (part-tags part) 215 (part-title-content part) 216 (part-style part) 217 (part-to-collect part) 218 (append para (part-blocks part)) 219 (cons (car l) (part-parts part))))] 220 [(part-start? (car l)) 221 (unless part-depth 222 (error 'decode "misplaced part; title: ~s" (part-start-title (car l)))) 223 (unless ((part-start-depth (car l)) . <= . part-depth) 224 (error 'decode 225 "misplaced part (the part is more than one layer deeper than its container); title: ~s" 226 (part-start-title (car l)))) 227 (let ([s (car l)]) 228 (let loop ([l (cdr l)] [s-accum null]) 229 (if (or (null? l) 230 (and (part-start? (car l)) 231 ((part-start-depth (car l)) . <= . part-depth)) 232 (part? (car l))) 233 (let ([para (decode-accum-para accum)] 234 [s (decode-styled-part (reverse s-accum) 235 (part-start-tag-prefix s) 236 (part-start-tags s) 237 (part-start-style s) 238 (part-start-title s) 239 (add1 part-depth))] 240 [part (decode-flow* l keys colls tag-prefix tags vers style 241 title part-depth)]) 242 (make-part (part-tag-prefix part) 243 (part-tags part) 244 (part-title-content part) 245 (part-style part) 246 (part-to-collect part) 247 para 248 (cons s (part-parts part)))) 249 (cond 250 [(splice? (car l)) 251 (loop (append (splice-run (car l)) (cdr l)) s-accum)] 252 [(list? (car l)) 253 (loop (append (car l) (cdr l)) s-accum)] 254 [else 255 (loop (cdr l) (cons (car l) s-accum))]))))] 256 [(splice? (car l)) 257 (loop (append (splice-run (car l)) (cdr l)) 258 next? keys colls accum title tag-prefix tags vers style)] 259 [(list? (car l)) 260 (loop (append (car l) (cdr l)) 261 next? keys colls accum title tag-prefix tags vers style)] 262 [(part-index-decl? (car l)) 263 (loop (cdr l) next? (cons (car l) keys) colls accum title tag-prefix 264 tags vers style)] 265 [(part-collect-decl? (car l)) 266 (loop (cdr l) next? keys 267 (cons (part-collect-decl-element (car l)) colls) 268 accum title tag-prefix tags vers style)] 269 [(part-tag-decl? (car l)) 270 (loop (cdr l) next? keys colls accum title tag-prefix 271 (append tags (list (part-tag-decl-tag (car l)))) 272 vers style)] 273 [(null? (cdr l)) 274 (loop null #f keys colls (cons (car l) accum) title tag-prefix tags 275 vers style)] 276 [(and (pair? (cdr l)) 277 (or (splice? (cadr l)) 278 (list? (cadr l)))) 279 (loop (cons (car l) (append ((if (splice? (cadr l)) splice-run values) (cadr l)) (cddr l))) 280 next? keys colls accum title tag-prefix tags vers style)] 281 [(line-break? (car l)) 282 (if next? 283 (loop (cdr l) #t keys colls accum title tag-prefix tags vers style) 284 (let ([m (match-newline-whitespace (cdr l))]) 285 (if m 286 (let ([part (loop m #t keys colls null title tag-prefix tags vers 287 style)]) 288 (make-part 289 (part-tag-prefix part) 290 (part-tags part) 291 (part-title-content part) 292 (part-style part) 293 (part-to-collect part) 294 (append (decode-accum-para accum) 295 (part-blocks part)) 296 (part-parts part))) 297 (loop (cdr l) #f keys colls (cons (car l) accum) title tag-prefix 298 tags vers style))))] 299 [else (loop (cdr l) #f keys colls (cons (car l) accum) title tag-prefix 300 tags vers style)]))) 301 302 (define (decode-part l tags title depth) 303 (decode-flow* l null null #f tags #f plain title depth)) 304 305 (define (decode-styled-part l tag-prefix tags style title depth) 306 (decode-flow* l null null tag-prefix tags #f style title depth)) 307 308 (define (decode-flow l) 309 (part-blocks (decode-flow* l null null #f null #f plain #f #f))) 310 311 (define (match-newline-whitespace l) 312 (cond [(null? l) #f] 313 [(void? (car l)) (match-newline-whitespace (cdr l))] 314 [(line-break? (car l)) (skip-whitespace l)] 315 [(splice? (car l)) 316 (match-newline-whitespace (append (splice-run (car l)) (cdr l)))] 317 [(list? (car l)) 318 (match-newline-whitespace (append (car l) (cdr l)))] 319 [(whitespace? (car l)) (match-newline-whitespace (cdr l))] 320 [else #f])) 321 322 (define (skip-whitespace l) 323 (if (or (null? l) 324 (not (or (whitespace? (car l)) 325 (void? (car l))))) 326 l 327 (skip-whitespace (cdr l)))) 328 329 (define (decode l) 330 (decode-part l null #f 0)) 331 332 (define (decode-paragraph l) 333 (make-paragraph plain (decode-content l))) 334 335 (define (decode-content l) 336 (append-map (lambda (s) (cond 337 [(string? s) (decode-string s)] 338 [(void? s) null] 339 [(splice? s) (decode-content (splice-run s))] 340 [(list? s) (decode-content s)] 341 [else (list s)])) 342 (skip-whitespace l))) 343 344 (define (decode-compound-paragraph l) 345 (define (finish-accum para-accum) 346 (if (null? para-accum) 347 null 348 (list (make-paragraph plain (skip-whitespace (apply append (reverse para-accum))))))) 349 (let ([r (let loop ([l (skip-whitespace l)] 350 [para-accum null]) 351 (cond 352 [(null? l) 353 (finish-accum para-accum)] 354 [else 355 (let ([s (car l)]) 356 (cond 357 [(block? s) (append 358 (finish-accum para-accum) 359 (cons s (loop (skip-whitespace (cdr l)) null)))] 360 [(string? s) (loop (cdr l) 361 (cons (decode-string s) para-accum))] 362 [else (loop (cdr l) 363 (cons (list (car l)) para-accum))]))]))]) 364 (cond 365 [(null? r) 366 (make-paragraph plain null)] 367 [(null? (cdr r)) 368 (car r)] 369 [(make-compound-paragraph plain r)])))