Mercurial > urweb
diff src/c/urweb.c @ 1054:b06a2a65e670
UTF-8 in dynamic escaping
author | Adam Chlipala <adamc@hcoop.net> |
---|---|
date | Thu, 03 Dec 2009 11:50:51 -0500 |
parents | a8a825861397 |
children | 118a5a08a881 |
line wrap: on
line diff
--- a/src/c/urweb.c Thu Dec 03 11:20:13 2009 -0500 +++ b/src/c/urweb.c Thu Dec 03 11:50:51 2009 -0500 @@ -1410,6 +1410,10 @@ return result; } +static int isCont(unsigned char ch) { + return ch / 64 == 2; +} + char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { int len = strlen(s); char *result, *p; @@ -1418,7 +1422,7 @@ result = p = ctx->heap.front; for (; *s; s++) { - char c = *s; + unsigned char c = *s; if (c == '"') { strcpy(p, """); @@ -1429,7 +1433,19 @@ } else if (isprint(c)) *p++ = c; - else { + else if (c / 32 == 6 && isCont(s[1])) { + memcpy(p, s, 2); + p += 2; + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + memcpy(p, s, 3); + p += 3; + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + memcpy(p, s, 4); + p += 4; + s += 3; + } else { int len2; sprintf(p, "&#%d;%n", c, &len2); p += len2; @@ -1499,7 +1515,7 @@ uw_check(ctx, strlen(s) * 6); for (; *s; s++) { - char c = *s; + unsigned char c = *s; if (c == '"') uw_write_unsafe(ctx, """); @@ -1507,7 +1523,22 @@ uw_write_unsafe(ctx, "&"); else if (isprint(c)) uw_writec_unsafe(ctx, c); - else { + else if (c / 32 == 6 && isCont(s[1])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + uw_writec_unsafe(ctx, s[3]); + s += 3; + } else { uw_write_unsafe(ctx, "&#"); uw_Basis_attrifyInt_w_unsafe(ctx, c); uw_writec_unsafe(ctx, ';'); @@ -1847,7 +1878,7 @@ uw_check_heap(ctx, strlen(s) * 5 + 1); for (r = s2 = ctx->heap.front; *s; s++) { - char c = *s; + unsigned char c = *s; switch (c) { case '<': @@ -1859,9 +1890,21 @@ s2 += 5; break; default: - if (isprint(c)) + if (isprint(c) || isspace(c)) *s2++ = c; - else { + else if (c / 32 == 6 && isCont(s[1])) { + memcpy(s2, s, 2); + s2 += 2; + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + memcpy(s2, s, 3); + s2 += 3; + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + memcpy(s2, s, 4); + s2 += 4; + s += 3; + } else { int len2; sprintf(s2, "&#%d;%n", c, &len2); s2 += len2; @@ -1878,7 +1921,7 @@ uw_check(ctx, strlen(s) * 6); for (; *s; s++) { - char c = *s; + unsigned char c = *s; switch (c) { case '<': @@ -1888,9 +1931,24 @@ uw_write_unsafe(ctx, "&"); break; default: - if (isprint(c)) + if (isprint(c) || isspace(c)) uw_writec_unsafe(ctx, c); - else { + else if (c / 32 == 6 && isCont(s[1])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + ++s; + } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + s += 2; + } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { + uw_writec_unsafe(ctx, c); + uw_writec_unsafe(ctx, s[1]); + uw_writec_unsafe(ctx, s[2]); + uw_writec_unsafe(ctx, s[3]); + s += 3; + } else { uw_write_unsafe(ctx, "&#"); uw_Basis_attrifyInt_w_unsafe(ctx, c); uw_writec_unsafe(ctx, ';');