Mercurial > urweb
diff src/mono_opt.sml @ 1053:4eb1c4a1b057
Escaping UTF-8 in MonoOpt
author | Adam Chlipala <adamc@hcoop.net> |
---|---|
date | Thu, 03 Dec 2009 11:20:13 -0500 |
parents | 93415bcf54c0 |
children | b06a2a65e670 |
line wrap: on
line diff
--- a/src/mono_opt.sml Wed Nov 25 09:48:23 2009 -0500 +++ b/src/mono_opt.sml Thu Dec 03 11:20:13 2009 -0500 @@ -61,14 +61,37 @@ val htmlifyInt = attrifyInt val htmlifyFloat = attrifyFloat -val htmlifyString = String.translate (fn ch => case ch of - #"<" => "<" - | #"&" => "&" - | _ => - if Char.isPrint ch orelse Char.isSpace ch then - str ch - else - "&#" ^ Int.toString (ord ch) ^ ";") + +fun htmlifyString s = + let + fun hs (pos, acc) = + if pos >= size s then + String.concat (rev acc) + else + case String.sub (s, pos) of + #"<" => hs (pos+1, "<" :: acc) + | #"&" => hs (pos+1, "&" :: acc) + | ch => + let + val n = ord ch + fun isCont k = pos + k < size s + andalso ord (String.sub (s, pos + k)) div 64 = 2 + fun unicode k = hs (pos+k+1, String.substring (s, pos, k+1) :: acc) + in + if Char.isPrint ch orelse Char.isSpace ch then + hs (pos+1, str ch :: acc) + else if n div 32 = 6 andalso isCont 1 then + unicode 1 + else if n div 16 = 14 andalso isCont 1 andalso isCont 2 then + unicode 2 + else if n div 8 = 30 andalso isCont 1 andalso isCont 2 andalso isCont 3 then + unicode 3 + else + hs (pos+1, "&#" ^ Int.toString (ord ch) ^ ";" :: acc) + end + in + hs (0, []) + end fun hexIt ch = let