Mercurial > urweb
comparison src/urweb.lex @ 1592:1c9f8f06c1d6
Support the full set of XHTML character entities
author | Adam Chlipala <adam@chlipala.net> |
---|---|
date | Sat, 05 Nov 2011 15:05:13 -0400 |
parents | 60d438cdb3a5 |
children | e44be6ece475 |
comparison
equal
deleted
inserted
replaced
1591:20f898c29525 | 1592:1c9f8f06c1d6 |
---|---|
111 | 111 |
112 fun initialize () = (xmlTag := []; | 112 fun initialize () = (xmlTag := []; |
113 xmlString := false) | 113 xmlString := false) |
114 | 114 |
115 | 115 |
116 structure StringMap = BinaryMapFn(struct | |
117 type ord_key = string | |
118 val compare = String.compare | |
119 end) | |
120 | |
121 val entities = foldl (fn ((key, value), entities) => StringMap.insert (entities, key, value)) | |
122 StringMap.empty Entities.all | |
123 | |
116 fun unescape loc s = | 124 fun unescape loc s = |
117 let | 125 let |
118 fun process (s, acc) = | 126 fun process (s, acc) = |
119 let | 127 let |
120 val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s | 128 val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s |
139 if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#" | 147 if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#" |
140 andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then | 148 andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then |
141 let | 149 let |
142 val code = Substring.string (Substring.slice (code, 1, NONE)) | 150 val code = Substring.string (Substring.slice (code, 1, NONE)) |
143 in | 151 in |
144 Option.map chr (Int.fromString code) | 152 Option.map Utf8.encode (Int.fromString code) |
145 end | 153 end |
146 else case Substring.string code of | 154 else |
147 "amp" => SOME #"&" | 155 Option.map Utf8.encode (StringMap.find (entities, Substring.string code)) |
148 | "lt" => SOME #"<" | |
149 | "gt" => SOME #">" | |
150 | "quot" => SOME #"\"" | |
151 | _ => NONE | |
152 in | 156 in |
153 case special of | 157 case special of |
154 NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity " | 158 NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity " |
155 ^ Substring.string code); | 159 ^ Substring.string code); |
156 "") | 160 "") |
157 | SOME ch => process (s, Substring.full (String.str ch) :: pre :: acc) | 161 | SOME sp => process (s, Substring.full sp :: pre :: acc) |
158 end | 162 end |
159 end | 163 end |
160 end | 164 end |
161 in | 165 in |
162 process (Substring.full s, []) | 166 process (Substring.full s, []) |