comparison src/c/urweb.c @ 1054:b06a2a65e670

UTF-8 in dynamic escaping
author Adam Chlipala <adamc@hcoop.net>
date Thu, 03 Dec 2009 11:50:51 -0500
parents a8a825861397
children 118a5a08a881
comparison
equal deleted inserted replaced
1053:4eb1c4a1b057 1054:b06a2a65e670
1408 sprintf(result, "%g%n", n, &len); 1408 sprintf(result, "%g%n", n, &len);
1409 ctx->heap.front += len+1; 1409 ctx->heap.front += len+1;
1410 return result; 1410 return result;
1411 } 1411 }
1412 1412
1413 static int isCont(unsigned char ch) {
1414 return ch / 64 == 2;
1415 }
1416
1413 char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { 1417 char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) {
1414 int len = strlen(s); 1418 int len = strlen(s);
1415 char *result, *p; 1419 char *result, *p;
1416 uw_check_heap(ctx, len * 6 + 1); 1420 uw_check_heap(ctx, len * 6 + 1);
1417 1421
1418 result = p = ctx->heap.front; 1422 result = p = ctx->heap.front;
1419 1423
1420 for (; *s; s++) { 1424 for (; *s; s++) {
1421 char c = *s; 1425 unsigned char c = *s;
1422 1426
1423 if (c == '"') { 1427 if (c == '"') {
1424 strcpy(p, "&quot;"); 1428 strcpy(p, "&quot;");
1425 p += 6; 1429 p += 6;
1426 } else if (c == '&') { 1430 } else if (c == '&') {
1427 strcpy(p, "&amp;"); 1431 strcpy(p, "&amp;");
1428 p += 5; 1432 p += 5;
1429 } 1433 }
1430 else if (isprint(c)) 1434 else if (isprint(c))
1431 *p++ = c; 1435 *p++ = c;
1432 else { 1436 else if (c / 32 == 6 && isCont(s[1])) {
1437 memcpy(p, s, 2);
1438 p += 2;
1439 ++s;
1440 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
1441 memcpy(p, s, 3);
1442 p += 3;
1443 s += 2;
1444 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
1445 memcpy(p, s, 4);
1446 p += 4;
1447 s += 3;
1448 } else {
1433 int len2; 1449 int len2;
1434 sprintf(p, "&#%d;%n", c, &len2); 1450 sprintf(p, "&#%d;%n", c, &len2);
1435 p += len2; 1451 p += len2;
1436 } 1452 }
1437 } 1453 }
1497 1513
1498 uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) { 1514 uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) {
1499 uw_check(ctx, strlen(s) * 6); 1515 uw_check(ctx, strlen(s) * 6);
1500 1516
1501 for (; *s; s++) { 1517 for (; *s; s++) {
1502 char c = *s; 1518 unsigned char c = *s;
1503 1519
1504 if (c == '"') 1520 if (c == '"')
1505 uw_write_unsafe(ctx, "&quot;"); 1521 uw_write_unsafe(ctx, "&quot;");
1506 else if (c == '&') 1522 else if (c == '&')
1507 uw_write_unsafe(ctx, "&amp;"); 1523 uw_write_unsafe(ctx, "&amp;");
1508 else if (isprint(c)) 1524 else if (isprint(c))
1509 uw_writec_unsafe(ctx, c); 1525 uw_writec_unsafe(ctx, c);
1510 else { 1526 else if (c / 32 == 6 && isCont(s[1])) {
1527 uw_writec_unsafe(ctx, c);
1528 uw_writec_unsafe(ctx, s[1]);
1529 ++s;
1530 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
1531 uw_writec_unsafe(ctx, c);
1532 uw_writec_unsafe(ctx, s[1]);
1533 uw_writec_unsafe(ctx, s[2]);
1534 s += 2;
1535 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
1536 uw_writec_unsafe(ctx, c);
1537 uw_writec_unsafe(ctx, s[1]);
1538 uw_writec_unsafe(ctx, s[2]);
1539 uw_writec_unsafe(ctx, s[3]);
1540 s += 3;
1541 } else {
1511 uw_write_unsafe(ctx, "&#"); 1542 uw_write_unsafe(ctx, "&#");
1512 uw_Basis_attrifyInt_w_unsafe(ctx, c); 1543 uw_Basis_attrifyInt_w_unsafe(ctx, c);
1513 uw_writec_unsafe(ctx, ';'); 1544 uw_writec_unsafe(ctx, ';');
1514 } 1545 }
1515 } 1546 }
1845 char *r, *s2; 1876 char *r, *s2;
1846 1877
1847 uw_check_heap(ctx, strlen(s) * 5 + 1); 1878 uw_check_heap(ctx, strlen(s) * 5 + 1);
1848 1879
1849 for (r = s2 = ctx->heap.front; *s; s++) { 1880 for (r = s2 = ctx->heap.front; *s; s++) {
1850 char c = *s; 1881 unsigned char c = *s;
1851 1882
1852 switch (c) { 1883 switch (c) {
1853 case '<': 1884 case '<':
1854 strcpy(s2, "&lt;"); 1885 strcpy(s2, "&lt;");
1855 s2 += 4; 1886 s2 += 4;
1857 case '&': 1888 case '&':
1858 strcpy(s2, "&amp;"); 1889 strcpy(s2, "&amp;");
1859 s2 += 5; 1890 s2 += 5;
1860 break; 1891 break;
1861 default: 1892 default:
1862 if (isprint(c)) 1893 if (isprint(c) || isspace(c))
1863 *s2++ = c; 1894 *s2++ = c;
1864 else { 1895 else if (c / 32 == 6 && isCont(s[1])) {
1896 memcpy(s2, s, 2);
1897 s2 += 2;
1898 ++s;
1899 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
1900 memcpy(s2, s, 3);
1901 s2 += 3;
1902 s += 2;
1903 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
1904 memcpy(s2, s, 4);
1905 s2 += 4;
1906 s += 3;
1907 } else {
1865 int len2; 1908 int len2;
1866 sprintf(s2, "&#%d;%n", c, &len2); 1909 sprintf(s2, "&#%d;%n", c, &len2);
1867 s2 += len2; 1910 s2 += len2;
1868 } 1911 }
1869 } 1912 }
1876 1919
1877 uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { 1920 uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) {
1878 uw_check(ctx, strlen(s) * 6); 1921 uw_check(ctx, strlen(s) * 6);
1879 1922
1880 for (; *s; s++) { 1923 for (; *s; s++) {
1881 char c = *s; 1924 unsigned char c = *s;
1882 1925
1883 switch (c) { 1926 switch (c) {
1884 case '<': 1927 case '<':
1885 uw_write_unsafe(ctx, "&lt;"); 1928 uw_write_unsafe(ctx, "&lt;");
1886 break; 1929 break;
1887 case '&': 1930 case '&':
1888 uw_write_unsafe(ctx, "&amp;"); 1931 uw_write_unsafe(ctx, "&amp;");
1889 break; 1932 break;
1890 default: 1933 default:
1891 if (isprint(c)) 1934 if (isprint(c) || isspace(c))
1892 uw_writec_unsafe(ctx, c); 1935 uw_writec_unsafe(ctx, c);
1893 else { 1936 else if (c / 32 == 6 && isCont(s[1])) {
1937 uw_writec_unsafe(ctx, c);
1938 uw_writec_unsafe(ctx, s[1]);
1939 ++s;
1940 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) {
1941 uw_writec_unsafe(ctx, c);
1942 uw_writec_unsafe(ctx, s[1]);
1943 uw_writec_unsafe(ctx, s[2]);
1944 s += 2;
1945 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) {
1946 uw_writec_unsafe(ctx, c);
1947 uw_writec_unsafe(ctx, s[1]);
1948 uw_writec_unsafe(ctx, s[2]);
1949 uw_writec_unsafe(ctx, s[3]);
1950 s += 3;
1951 } else {
1894 uw_write_unsafe(ctx, "&#"); 1952 uw_write_unsafe(ctx, "&#");
1895 uw_Basis_attrifyInt_w_unsafe(ctx, c); 1953 uw_Basis_attrifyInt_w_unsafe(ctx, c);
1896 uw_writec_unsafe(ctx, ';'); 1954 uw_writec_unsafe(ctx, ';');
1897 } 1955 }
1898 } 1956 }