comparison xml/parse.sml @ 1592:1c9f8f06c1d6

Support the full set of XHTML character entities
author Adam Chlipala <adam@chlipala.net>
date Sat, 05 Nov 2011 15:05:13 -0400
parents
children 7e2655b25ea1
comparison
equal deleted inserted replaced
1591:20f898c29525 1592:1c9f8f06c1d6
1 (* Copyright (c) 2011, Adam Chlipala
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * - Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * - Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
12 * - The names of contributors may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 *)
27
28 (* Building SML code from XML entity tables *)
29
30 fun main () =
31 let
32 fun doFile fname =
33 let
34 val inf = TextIO.openIn fname
35
36 fun loop () =
37 case TextIO.inputLine inf of
38 NONE => TextIO.closeIn inf
39 | SOME line =>
40 if String.isPrefix "<!ENTITY " line then
41 case String.tokens (fn ch => Char.isSpace ch orelse ch = #">") line of
42 "<!ENTITY" :: ent :: exp :: _ =>
43 let
44 val exp = if String.isPrefix "\"&#" exp andalso String.isSuffix ";\"" exp then
45 let
46 val middle = String.substring (exp, 3, size exp - 5)
47 in
48 if CharVector.all Char.isDigit middle then
49 middle
50 else if String.isPrefix "38;#" middle then
51 String.extract (middle, 4, NONE)
52 else
53 raise Fail "Bad entity expression [1]"
54 end
55 else
56 raise Fail "Bad entity expansion [2]"
57 in
58 print ("\t\t(\"" ^ ent ^ "\", " ^ exp ^ "),\n");
59 loop ()
60 end
61 | _ => raise Fail "Bad ENTITY line"
62 else
63 loop ()
64 in
65 loop ()
66 end
67 in
68 print "structure Entities = struct\n";
69 print "\tval all = [\n";
70 doFile "xml/xhtml-lat1.ent";
71 doFile "xml/xhtml-special.ent";
72 doFile "xml/xhtml-symbol.ent";
73 print "\t(\"\", 0)]\n";
74 print "end\n"
75 end
76
77 val () = main ()