Mercurial > urweb
comparison xml/parse.sml @ 1592:1c9f8f06c1d6
Support the full set of XHTML character entities
author | Adam Chlipala <adam@chlipala.net> |
---|---|
date | Sat, 05 Nov 2011 15:05:13 -0400 |
parents | |
children | 7e2655b25ea1 |
comparison
equal
deleted
inserted
replaced
1591:20f898c29525 | 1592:1c9f8f06c1d6 |
---|---|
1 (* Copyright (c) 2011, Adam Chlipala | |
2 * All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are met: | |
6 * | |
7 * - Redistributions of source code must retain the above copyright notice, | |
8 * this list of conditions and the following disclaimer. | |
9 * - Redistributions in binary form must reproduce the above copyright notice, | |
10 * this list of conditions and the following disclaimer in the documentation | |
11 * and/or other materials provided with the distribution. | |
12 * - The names of contributors may not be used to endorse or promote products | |
13 * derived from this software without specific prior written permission. | |
14 * | |
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
25 * POSSIBILITY OF SUCH DAMAGE. | |
26 *) | |
27 | |
28 (* Building SML code from XML entity tables *) | |
29 | |
30 fun main () = | |
31 let | |
32 fun doFile fname = | |
33 let | |
34 val inf = TextIO.openIn fname | |
35 | |
36 fun loop () = | |
37 case TextIO.inputLine inf of | |
38 NONE => TextIO.closeIn inf | |
39 | SOME line => | |
40 if String.isPrefix "<!ENTITY " line then | |
41 case String.tokens (fn ch => Char.isSpace ch orelse ch = #">") line of | |
42 "<!ENTITY" :: ent :: exp :: _ => | |
43 let | |
44 val exp = if String.isPrefix "\"&#" exp andalso String.isSuffix ";\"" exp then | |
45 let | |
46 val middle = String.substring (exp, 3, size exp - 5) | |
47 in | |
48 if CharVector.all Char.isDigit middle then | |
49 middle | |
50 else if String.isPrefix "38;#" middle then | |
51 String.extract (middle, 4, NONE) | |
52 else | |
53 raise Fail "Bad entity expression [1]" | |
54 end | |
55 else | |
56 raise Fail "Bad entity expansion [2]" | |
57 in | |
58 print ("\t\t(\"" ^ ent ^ "\", " ^ exp ^ "),\n"); | |
59 loop () | |
60 end | |
61 | _ => raise Fail "Bad ENTITY line" | |
62 else | |
63 loop () | |
64 in | |
65 loop () | |
66 end | |
67 in | |
68 print "structure Entities = struct\n"; | |
69 print "\tval all = [\n"; | |
70 doFile "xml/xhtml-lat1.ent"; | |
71 doFile "xml/xhtml-special.ent"; | |
72 doFile "xml/xhtml-symbol.ent"; | |
73 print "\t(\"\", 0)]\n"; | |
74 print "end\n" | |
75 end | |
76 | |
77 val () = main () |