comparison src/urweb.lex @ 763:af41ec2f302a

Lexing character entities
author Adam Chlipala <adamc@hcoop.net>
date Thu, 30 Apr 2009 16:25:27 -0400
parents 8688e01ae469
children 395a5d450cc0
comparison
equal deleted inserted replaced
762:9021d44ba6b2 763:af41ec2f302a
1 (* Copyright (c) 2008, Adam Chlipala 1 (* -*- mode: sml-lex -*- *)
2
3 (* Copyright (c) 2008-2009, Adam Chlipala
2 * All rights reserved. 4 * All rights reserved.
3 * 5 *
4 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met: 7 * modification, are permitted provided that the following conditions are met:
6 * 8 *
103 | _ => () 105 | _ => ()
104 106
105 fun initialize () = (xmlTag := []; 107 fun initialize () = (xmlTag := [];
106 xmlString := false) 108 xmlString := false)
107 109
110
111 fun unescape loc s =
112 let
113 fun process (s, acc) =
114 let
115 val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s
116 in
117 if Substring.size after = 0 then
118 Substring.concat (rev (s :: acc))
119 else
120 let
121 val after = Substring.slice (after, 1, NONE)
122 val (befor', after') = Substring.splitl (fn ch => ch <> #";") after
123 in
124 if Substring.size after' = 0 then
125 (ErrorMsg.errorAt' loc "Missing ';' after '&'";
126 "")
127 else
128 let
129 val pre = befor
130 val code = befor'
131 val s = Substring.slice (after', 1, NONE)
132
133 val special =
134 if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#"
135 andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then
136 let
137 val code = Substring.string (Substring.slice (code, 1, NONE))
138 in
139 Option.map chr (Int.fromString code)
140 end
141 else case Substring.string code of
142 "amp" => SOME #"&"
143 | "lt" => SOME #"<"
144 | "gt" => SOME #">"
145 | "quot" => SOME #"\""
146 | _ => NONE
147 in
148 case special of
149 NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity "
150 ^ Substring.string code);
151 "")
152 | SOME ch => process (s, Substring.full (String.str ch) :: pre :: acc)
153 end
154 end
155 end
156 in
157 process (Substring.full s, [])
158 end
108 159
109 %% 160 %%
110 %header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS)); 161 %header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS));
111 %full 162 %full
112 %s COMMENT STRING XML XMLTAG; 163 %s COMMENT STRING XML XMLTAG;
229 280
230 <XML> "{" => (YYBEGIN INITIAL; 281 <XML> "{" => (YYBEGIN INITIAL;
231 pushLevel (fn () => YYBEGIN XML); 282 pushLevel (fn () => YYBEGIN XML);
232 Tokens.LBRACE (yypos, yypos + 1)); 283 Tokens.LBRACE (yypos, yypos + 1));
233 284
234 <XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); 285 <XML> {notags} => (Tokens.NOTAGS (unescape (yypos, yypos + size yytext) yytext, yypos, yypos + size yytext));
235 286
236 <XML> . => (ErrorMsg.errorAt' (yypos, yypos) 287 <XML> . => (ErrorMsg.errorAt' (yypos, yypos)
237 ("illegal XML character: \"" ^ yytext ^ "\""); 288 ("illegal XML character: \"" ^ yytext ^ "\"");
238 continue ()); 289 continue ());
239 290