comparison src/urweb.lex @ 1592:1c9f8f06c1d6

Support the full set of XHTML character entities
author Adam Chlipala <adam@chlipala.net>
date Sat, 05 Nov 2011 15:05:13 -0400
parents 60d438cdb3a5
children e44be6ece475
comparison
equal deleted inserted replaced
1591:20f898c29525 1592:1c9f8f06c1d6
111 111
112 fun initialize () = (xmlTag := []; 112 fun initialize () = (xmlTag := [];
113 xmlString := false) 113 xmlString := false)
114 114
115 115
116 structure StringMap = BinaryMapFn(struct
117 type ord_key = string
118 val compare = String.compare
119 end)
120
121 val entities = foldl (fn ((key, value), entities) => StringMap.insert (entities, key, value))
122 StringMap.empty Entities.all
123
116 fun unescape loc s = 124 fun unescape loc s =
117 let 125 let
118 fun process (s, acc) = 126 fun process (s, acc) =
119 let 127 let
120 val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s 128 val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s
139 if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#" 147 if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#"
140 andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then 148 andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then
141 let 149 let
142 val code = Substring.string (Substring.slice (code, 1, NONE)) 150 val code = Substring.string (Substring.slice (code, 1, NONE))
143 in 151 in
144 Option.map chr (Int.fromString code) 152 Option.map Utf8.encode (Int.fromString code)
145 end 153 end
146 else case Substring.string code of 154 else
147 "amp" => SOME #"&" 155 Option.map Utf8.encode (StringMap.find (entities, Substring.string code))
148 | "lt" => SOME #"<"
149 | "gt" => SOME #">"
150 | "quot" => SOME #"\""
151 | _ => NONE
152 in 156 in
153 case special of 157 case special of
154 NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity " 158 NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity "
155 ^ Substring.string code); 159 ^ Substring.string code);
156 "") 160 "")
157 | SOME ch => process (s, Substring.full (String.str ch) :: pre :: acc) 161 | SOME sp => process (s, Substring.full sp :: pre :: acc)
158 end 162 end
159 end 163 end
160 end 164 end
161 in 165 in
162 process (Substring.full s, []) 166 process (Substring.full s, [])