Mercurial > meta
comparison html.ur @ 9:8eaaca74a64c
Import HTML parser
author | Adam Chlipala <adam@chlipala.net> |
---|---|
date | Wed, 15 Dec 2010 09:27:46 -0500 |
parents | |
children | 1a915f89f23a |
comparison
equal
deleted
inserted
replaced
8:ec4de8f848f8 | 9:8eaaca74a64c |
---|---|
1 open Parse | |
2 | |
3 con attribute = fn t => {Nam : string, | |
4 Parse : string -> option t} | |
5 | |
6 con tag = fn ts => {Nam : string, | |
7 Attributes : $(map attribute ts), | |
8 Folder : folder ts, | |
9 Construct : ctx ::: {Unit} -> [[Body] ~ ctx] => $ts | |
10 -> xml ([Body] ++ ctx) [] [] -> xml ([Body] ++ ctx) [] []} | |
11 | |
12 fun tag [use] [ignore] [use ~ ignore] (fl : folder use) (name : string) (attrs : $(map attribute use)) | |
13 (construct : ctx ::: {Unit} -> [[Body] ~ ctx] => Basis.tag (use ++ ignore) ([Body] ++ ctx) ([Body] ++ ctx) [] []) = | |
14 {Nam = name, | |
15 Attributes = attrs, | |
16 Folder = fl, | |
17 Construct = fn [ctx] [[Body] ~ ctx] (ats : $use) (inner : xml ([Body] ++ ctx) [] []) => | |
18 Basis.tag None ats construct inner} | |
19 | |
20 fun simpleTag [ignore] name (bt : bodyTag ignore) : tag [] = | |
21 @@tag [[]] [ignore] ! _ name {} (fn [ctx] [[Body] ~ ctx] => bt ()) | |
22 | |
23 fun simpleTag' [use] [ignore] [use ~ ignore] (fl : folder use) | |
24 name (bt : bodyTag (use ++ ignore)) (ats : $(map attribute use)) : tag use = | |
25 @@tag [use] [ignore] ! fl name ats (fn [ctx] [[Body] ~ ctx] => bt ()) | |
26 | |
27 fun url name = {Nam = name, | |
28 Parse = checkUrl} | |
29 | |
30 datatype error a = | |
31 Good of a | |
32 | Bad of string | |
33 | |
34 fun format [tags] (fl : folder tags) (tags : $(map tag tags)) [ctx] [[Body] ~ ctx] s = | |
35 let | |
36 fun loop s : error (xml ([Body] ++ ctx) [] [] * string) = | |
37 case String.msplit {Haystack = s, Needle = "&<"} of | |
38 None => Good (cdata s, "") | |
39 | Some (pre, ch, post) => | |
40 case ch of | |
41 #"&" => | |
42 (case String.split post #";" of | |
43 None => Bad "No ';' after '&'" | |
44 | Some (code, post) => | |
45 let | |
46 val xml = | |
47 case code of | |
48 "lt" => <xml><</xml> | |
49 | "gt" => <xml>></xml> | |
50 | "amp" => <xml>&</xml> | |
51 | _ => <xml/> | |
52 in | |
53 case loop post of | |
54 Good (after, post) => Good (<xml>{[pre]}{xml}{after}</xml>, post) | |
55 | x => x | |
56 end) | |
57 | _ => | |
58 if String.length post > 0 && String.sub post 0 = #"/" then | |
59 case String.split post #"\x3E" of | |
60 None => Bad "No '>' after '</'" | |
61 | Some (_, post) => Good (<xml>{[pre]}</xml>, post) | |
62 else | |
63 case String.msplit {Haystack = post, Needle = " >"} of | |
64 None => Bad "No '>' after '<'" | |
65 | Some (tname, ch, post) => | |
66 @foldR [tag] [fn _ => unit -> error (xml ([Body] ++ ctx) [] [] * string)] | |
67 (fn [nm :: Name] [ts :: {Type}] [r :: {{Type}}] [[nm] ~ r] (meta : tag ts) acc () => | |
68 if meta.Nam = tname then | |
69 let | |
70 fun doAttrs (ch, post, ats : $(map option ts)) = | |
71 if String.length post > 0 && Char.isSpace (String.sub post 0) then | |
72 doAttrs (ch, String.substring post {Start = 1, | |
73 Len = String.length post - 1}, | |
74 ats) | |
75 else | |
76 case ch of | |
77 #"\x3E" => Good (ats, post) | |
78 | _ => | |
79 case String.split post #"=" of | |
80 None => | |
81 (case String.split post #"\x3E" of | |
82 None => Bad "No tag ender '\x3E'" | |
83 | Some (_, post) => Good (ats, post)) | |
84 | Some (aname, post) => | |
85 if String.length post >= 1 && String.sub post 0 = #"\"" then | |
86 case String.split (String.substring post | |
87 {Start = 1, | |
88 Len = String.length post | |
89 - 1}) | |
90 #"\"" of | |
91 None => Bad "No '\"' to end attribute value" | |
92 | Some (aval, post) => | |
93 let | |
94 val ats = | |
95 @map2 [attribute] [option] [option] | |
96 (fn [t] meta v => | |
97 if aname = meta.Nam then | |
98 meta.Parse aval | |
99 else | |
100 v) | |
101 meta.Folder meta.Attributes ats | |
102 in | |
103 doAttrs (#" ", post, ats) | |
104 end | |
105 else | |
106 Bad "Attribute value doesn't begin with quote" | |
107 in | |
108 case doAttrs (ch, post, @map0 [option] (fn [t :: Type] => None) | |
109 meta.Folder) of | |
110 Good (ats, post) => | |
111 let | |
112 val ats = | |
113 @map2 [attribute] [option] [id] | |
114 (fn [t] meta v => | |
115 case v of | |
116 None => error <xml>Missing attribute {[meta.Nam]} | |
117 for {[tname]}</xml> | |
118 | Some v => v) | |
119 meta.Folder meta.Attributes ats | |
120 in | |
121 case loop post of | |
122 Good (inner, post) => | |
123 (case loop post of | |
124 Good (after, post) => | |
125 Good (<xml>{[pre]}{meta.Construct [ctx] ! | |
126 ats inner}{after}</xml>, post) | |
127 | x => x) | |
128 | x => x | |
129 end | |
130 | Bad s => Bad s | |
131 end | |
132 else | |
133 acc ()) | |
134 (fn () => Bad ("Unknown HTML tag " ^ tname)) fl tags () | |
135 in | |
136 case loop s of | |
137 Bad msg => Failure msg | |
138 | Good (xml, _) => Success xml | |
139 end | |
140 | |
141 val b = simpleTag "b" @@b | |
142 val i = simpleTag "i" @@i | |
143 val a = simpleTag' "a" @@a {Href = url "href"} | |
144 |