# HG changeset patch # User Adam Chlipala # Date 1241123127 14400 # Node ID af41ec2f302aa26631e38ff56cf4b7eb61a93259 # Parent 9021d44ba6b2e85fc68068877425da8323756031 Lexing character entities diff -r 9021d44ba6b2 -r af41ec2f302a src/urweb.lex --- a/src/urweb.lex Thu Apr 30 15:10:13 2009 -0400 +++ b/src/urweb.lex Thu Apr 30 16:25:27 2009 -0400 @@ -1,4 +1,6 @@ -(* Copyright (c) 2008, Adam Chlipala +(* -*- mode: sml-lex -*- *) + +(* Copyright (c) 2008-2009, Adam Chlipala * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -106,6 +108,55 @@ xmlString := false) +fun unescape loc s = + let + fun process (s, acc) = + let + val (befor, after) = Substring.splitl (fn ch => ch <> #"&") s + in + if Substring.size after = 0 then + Substring.concat (rev (s :: acc)) + else + let + val after = Substring.slice (after, 1, NONE) + val (befor', after') = Substring.splitl (fn ch => ch <> #";") after + in + if Substring.size after' = 0 then + (ErrorMsg.errorAt' loc "Missing ';' after '&'"; + "") + else + let + val pre = befor + val code = befor' + val s = Substring.slice (after', 1, NONE) + + val special = + if Substring.size code > 0 andalso Substring.sub (code, 0) = #"#" + andalso CharVectorSlice.all Char.isDigit (Substring.slice (code, 1, NONE)) then + let + val code = Substring.string (Substring.slice (code, 1, NONE)) + in + Option.map chr (Int.fromString code) + end + else case Substring.string code of + "amp" => SOME #"&" + | "lt" => SOME #"<" + | "gt" => SOME #">" + | "quot" => SOME #"\"" + | _ => NONE + in + case special of + NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity " + ^ Substring.string code); + "") + | SOME ch => process (s, Substring.full (String.str ch) :: pre :: acc) + end + end + end + in + process (Substring.full s, []) + end + %% %header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS)); %full @@ -231,7 +282,7 @@ pushLevel (fn () => YYBEGIN XML); Tokens.LBRACE (yypos, yypos + 1)); - {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); + {notags} => (Tokens.NOTAGS (unescape (yypos, yypos + size yytext) yytext, yypos, yypos + size yytext)); . => (ErrorMsg.errorAt' (yypos, yypos) ("illegal XML character: \"" ^ yytext ^ "\""); diff -r 9021d44ba6b2 -r af41ec2f302a tests/ent.ur --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/ent.ur Thu Apr 30 16:25:27 2009 -0400 @@ -0,0 +1,3 @@ +fun main () = return + <Whoa-hoa!> ABCD! + diff -r 9021d44ba6b2 -r af41ec2f302a tests/ent.urp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/ent.urp Thu Apr 30 16:25:27 2009 -0400 @@ -0,0 +1,3 @@ +debug + +ent diff -r 9021d44ba6b2 -r af41ec2f302a tests/ent.urs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/ent.urs Thu Apr 30 16:25:27 2009 -0400 @@ -0,0 +1,1 @@ +val main : unit -> transaction page