changeset 1059:03a81e26e5fe

Move to simpler HTML escaping
author Adam Chlipala <adamc@hcoop.net>
date Tue, 08 Dec 2009 08:48:29 -0500
parents 86b831978b8d
children 6f4f8b9c5023
files src/c/urweb.c src/mono_opt.sml
diffstat 2 files changed, 13 insertions(+), 157 deletions(-) [+]
line wrap: on
line diff
--- a/src/c/urweb.c	Sat Dec 05 14:34:44 2009 -0500
+++ b/src/c/urweb.c	Tue Dec 08 08:48:29 2009 -0500
@@ -1431,25 +1431,8 @@
       strcpy(p, "&amp;");
       p += 5;
     }
-    else if (isprint(c))
+    else
       *p++ = c;
-    else if (c / 32 == 6 && isCont(s[1])) {
-      memcpy(p, s, 2);
-      p += 2;
-      ++s;
-    } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
-      memcpy(p, s, 3);
-      p += 3;
-      s += 2;
-    } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
-      memcpy(p, s, 4);
-      p += 4;
-      s += 3;
-    } else {
-      int len2;
-      sprintf(p, "&#%d;%n", c, &len2);
-      p += len2;
-    }
   }
 
   *p++ = 0;
@@ -1470,13 +1453,8 @@
     strcpy(p, "&amp;");
     p += 5;
   }
-  else if (isprint(c))
+  else
     *p++ = c;
-  else {
-    int len2;
-    sprintf(p, "&#%d;%n", c, &len2);
-    p += len2;
-  }
 
   *p++ = 0;
   ctx->heap.front = p;
@@ -1521,28 +1499,8 @@
       uw_write_unsafe(ctx, "&quot;");
     else if (c == '&')
       uw_write_unsafe(ctx, "&amp;");
-    else if (isprint(c))
+    else
       uw_writec_unsafe(ctx, c);
-    else if (c / 32 == 6 && isCont(s[1])) {
-      uw_writec_unsafe(ctx, c);
-      uw_writec_unsafe(ctx, s[1]);
-      ++s;
-    } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
-      uw_writec_unsafe(ctx, c);
-      uw_writec_unsafe(ctx, s[1]);
-      uw_writec_unsafe(ctx, s[2]);
-      s += 2;
-    } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
-      uw_writec_unsafe(ctx, c);
-      uw_writec_unsafe(ctx, s[1]);
-      uw_writec_unsafe(ctx, s[2]);
-      uw_writec_unsafe(ctx, s[3]);
-      s += 3;
-    } else {
-      uw_write_unsafe(ctx, "&#");
-      uw_Basis_attrifyInt_w_unsafe(ctx, c);
-      uw_writec_unsafe(ctx, ';');
-    }
   }
 
   return uw_unit_v;
@@ -1555,13 +1513,8 @@
     uw_write_unsafe(ctx, "&quot;");
   else if (c == '&')
     uw_write_unsafe(ctx, "&amp;");
-  else if (isprint(c))
+  else
     uw_writec_unsafe(ctx, c);
-  else {
-    uw_write_unsafe(ctx, "&#");
-    uw_Basis_attrifyInt_w_unsafe(ctx, c);
-    uw_writec_unsafe(ctx, ';');
-  }
 
   return uw_unit_v;
 }
@@ -1890,25 +1843,7 @@
       s2 += 5;
       break;
     default:
-      if (isprint(c) || isspace(c))
-        *s2++ = c;
-      else if (c / 32 == 6 && isCont(s[1])) {
-        memcpy(s2, s, 2);
-        s2 += 2;
-        ++s;
-      } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
-        memcpy(s2, s, 3);
-        s2 += 3;
-        s += 2;
-      } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
-        memcpy(s2, s, 4);
-        s2 += 4;
-        s += 3;
-      } else {
-        int len2;
-        sprintf(s2, "&#%d;%n", c, &len2);
-        s2 += len2;
-      }
+      *s2++ = c;
     }
   }
 
@@ -1931,28 +1866,7 @@
       uw_write_unsafe(ctx, "&amp;");
       break;
     default:
-      if (isprint(c) || isspace(c))
-        uw_writec_unsafe(ctx, c);
-      else if (c / 32 == 6 && isCont(s[1])) {
-        uw_writec_unsafe(ctx, c);
-        uw_writec_unsafe(ctx, s[1]);
-        ++s;
-      } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
-        uw_writec_unsafe(ctx, c);
-        uw_writec_unsafe(ctx, s[1]);
-        uw_writec_unsafe(ctx, s[2]);
-        s += 2;
-      } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
-        uw_writec_unsafe(ctx, c);
-        uw_writec_unsafe(ctx, s[1]);
-        uw_writec_unsafe(ctx, s[2]);
-        uw_writec_unsafe(ctx, s[3]);
-        s += 3;
-      } else {
-        uw_write_unsafe(ctx, "&#");
-        uw_Basis_attrifyInt_w_unsafe(ctx, c);
-        uw_writec_unsafe(ctx, ';');
-      }
+      uw_writec_unsafe(ctx, c);
     }
   }
 
--- a/src/mono_opt.sml	Sat Dec 05 14:34:44 2009 -0500
+++ b/src/mono_opt.sml	Tue Dec 08 08:48:29 2009 -0500
@@ -45,45 +45,14 @@
     else
         Real.toString n
 
-fun attrifyString s =
-    let
-        fun hs (pos, acc) =
-            if pos >= size s then
-                String.concat (rev acc)
-            else
-                case String.sub (s, pos) of
-                    #"\"" => hs (pos+1, "&quot;" :: acc)
-                  | #"&" => hs (pos+1, "&amp;" :: acc)
-                  | ch =>
-                    let
-                        val n = ord ch
-                        fun isCont k = pos + k < size s
-                                       andalso ord (String.sub (s, pos + k)) div 64 = 2
-                        fun unicode k = hs (pos+k+1, String.substring (s, pos, k+1) :: acc)
-                    in
-                        if Char.isPrint ch orelse Char.isSpace ch then
-                            hs (pos+1, str ch :: acc)
-                        else if n div 32 = 6 andalso isCont 1 then
-                            unicode 1
-                        else if n div 16 = 14 andalso isCont 1 andalso isCont 2 then
-                            unicode 2
-                        else if n div 8 = 30 andalso isCont 1 andalso isCont 2 andalso isCont 3 then
-                            unicode 3
-                        else
-                            hs (pos+1, "&#" ^ Int.toString (ord ch) ^ ";" :: acc)
-                    end
-    in
-        hs (0, [])
-    end
-
 fun attrifyChar ch =
     case ch of
         #"\"" => "&quot;"
       | #"&" => "&amp;"
-      | ch => if Char.isPrint ch then
-                  str ch
-              else
-                  "&#" ^ Int.toString (ord ch) ^ ";"
+      | ch => str ch
+
+val attrifyString = String.translate attrifyChar
+
 
 val urlifyInt = attrifyInt
 val urlifyFloat = attrifyFloat
@@ -91,36 +60,9 @@
 val htmlifyInt = attrifyInt
 val htmlifyFloat = attrifyFloat
 
-fun htmlifyString s =
-    let
-        fun hs (pos, acc) =
-            if pos >= size s then
-                String.concat (rev acc)
-            else
-                case String.sub (s, pos) of
-                    #"<" => hs (pos+1, "&lt;" :: acc)
-                  | #"&" => hs (pos+1, "&amp;" :: acc)
-                  | ch =>
-                    let
-                        val n = ord ch
-                        fun isCont k = pos + k < size s
-                                       andalso ord (String.sub (s, pos + k)) div 64 = 2
-                        fun unicode k = hs (pos+k+1, String.substring (s, pos, k+1) :: acc)
-                    in
-                        if Char.isPrint ch then
-                            hs (pos+1, str ch :: acc)
-                        else if n div 32 = 6 andalso isCont 1 then
-                            unicode 1
-                        else if n div 16 = 14 andalso isCont 1 andalso isCont 2 then
-                            unicode 2
-                        else if n div 8 = 30 andalso isCont 1 andalso isCont 2 andalso isCont 3 then
-                            unicode 3
-                        else
-                            hs (pos+1, "&#" ^ Int.toString (ord ch) ^ ";" :: acc)
-                    end
-    in
-        hs (0, [])
-    end
+val htmlifyString = String.translate (fn #"<" => "&lt;"
+                                       | #"&" => "&amp;"
+                                       | ch => str ch)
 
 fun hexIt ch =
     let