Mercurial > urweb
comparison src/c/urweb.c @ 1054:b06a2a65e670
UTF-8 in dynamic escaping
author | Adam Chlipala <adamc@hcoop.net> |
---|---|
date | Thu, 03 Dec 2009 11:50:51 -0500 |
parents | a8a825861397 |
children | 118a5a08a881 |
comparison
equal
deleted
inserted
replaced
1053:4eb1c4a1b057 | 1054:b06a2a65e670 |
---|---|
1408 sprintf(result, "%g%n", n, &len); | 1408 sprintf(result, "%g%n", n, &len); |
1409 ctx->heap.front += len+1; | 1409 ctx->heap.front += len+1; |
1410 return result; | 1410 return result; |
1411 } | 1411 } |
1412 | 1412 |
1413 static int isCont(unsigned char ch) { | |
1414 return ch / 64 == 2; | |
1415 } | |
1416 | |
1413 char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { | 1417 char *uw_Basis_attrifyString(uw_context ctx, uw_Basis_string s) { |
1414 int len = strlen(s); | 1418 int len = strlen(s); |
1415 char *result, *p; | 1419 char *result, *p; |
1416 uw_check_heap(ctx, len * 6 + 1); | 1420 uw_check_heap(ctx, len * 6 + 1); |
1417 | 1421 |
1418 result = p = ctx->heap.front; | 1422 result = p = ctx->heap.front; |
1419 | 1423 |
1420 for (; *s; s++) { | 1424 for (; *s; s++) { |
1421 char c = *s; | 1425 unsigned char c = *s; |
1422 | 1426 |
1423 if (c == '"') { | 1427 if (c == '"') { |
1424 strcpy(p, """); | 1428 strcpy(p, """); |
1425 p += 6; | 1429 p += 6; |
1426 } else if (c == '&') { | 1430 } else if (c == '&') { |
1427 strcpy(p, "&"); | 1431 strcpy(p, "&"); |
1428 p += 5; | 1432 p += 5; |
1429 } | 1433 } |
1430 else if (isprint(c)) | 1434 else if (isprint(c)) |
1431 *p++ = c; | 1435 *p++ = c; |
1432 else { | 1436 else if (c / 32 == 6 && isCont(s[1])) { |
1437 memcpy(p, s, 2); | |
1438 p += 2; | |
1439 ++s; | |
1440 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { | |
1441 memcpy(p, s, 3); | |
1442 p += 3; | |
1443 s += 2; | |
1444 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { | |
1445 memcpy(p, s, 4); | |
1446 p += 4; | |
1447 s += 3; | |
1448 } else { | |
1433 int len2; | 1449 int len2; |
1434 sprintf(p, "&#%d;%n", c, &len2); | 1450 sprintf(p, "&#%d;%n", c, &len2); |
1435 p += len2; | 1451 p += len2; |
1436 } | 1452 } |
1437 } | 1453 } |
1497 | 1513 |
1498 uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) { | 1514 uw_unit uw_Basis_attrifyString_w(uw_context ctx, uw_Basis_string s) { |
1499 uw_check(ctx, strlen(s) * 6); | 1515 uw_check(ctx, strlen(s) * 6); |
1500 | 1516 |
1501 for (; *s; s++) { | 1517 for (; *s; s++) { |
1502 char c = *s; | 1518 unsigned char c = *s; |
1503 | 1519 |
1504 if (c == '"') | 1520 if (c == '"') |
1505 uw_write_unsafe(ctx, """); | 1521 uw_write_unsafe(ctx, """); |
1506 else if (c == '&') | 1522 else if (c == '&') |
1507 uw_write_unsafe(ctx, "&"); | 1523 uw_write_unsafe(ctx, "&"); |
1508 else if (isprint(c)) | 1524 else if (isprint(c)) |
1509 uw_writec_unsafe(ctx, c); | 1525 uw_writec_unsafe(ctx, c); |
1510 else { | 1526 else if (c / 32 == 6 && isCont(s[1])) { |
1527 uw_writec_unsafe(ctx, c); | |
1528 uw_writec_unsafe(ctx, s[1]); | |
1529 ++s; | |
1530 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { | |
1531 uw_writec_unsafe(ctx, c); | |
1532 uw_writec_unsafe(ctx, s[1]); | |
1533 uw_writec_unsafe(ctx, s[2]); | |
1534 s += 2; | |
1535 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { | |
1536 uw_writec_unsafe(ctx, c); | |
1537 uw_writec_unsafe(ctx, s[1]); | |
1538 uw_writec_unsafe(ctx, s[2]); | |
1539 uw_writec_unsafe(ctx, s[3]); | |
1540 s += 3; | |
1541 } else { | |
1511 uw_write_unsafe(ctx, "&#"); | 1542 uw_write_unsafe(ctx, "&#"); |
1512 uw_Basis_attrifyInt_w_unsafe(ctx, c); | 1543 uw_Basis_attrifyInt_w_unsafe(ctx, c); |
1513 uw_writec_unsafe(ctx, ';'); | 1544 uw_writec_unsafe(ctx, ';'); |
1514 } | 1545 } |
1515 } | 1546 } |
1845 char *r, *s2; | 1876 char *r, *s2; |
1846 | 1877 |
1847 uw_check_heap(ctx, strlen(s) * 5 + 1); | 1878 uw_check_heap(ctx, strlen(s) * 5 + 1); |
1848 | 1879 |
1849 for (r = s2 = ctx->heap.front; *s; s++) { | 1880 for (r = s2 = ctx->heap.front; *s; s++) { |
1850 char c = *s; | 1881 unsigned char c = *s; |
1851 | 1882 |
1852 switch (c) { | 1883 switch (c) { |
1853 case '<': | 1884 case '<': |
1854 strcpy(s2, "<"); | 1885 strcpy(s2, "<"); |
1855 s2 += 4; | 1886 s2 += 4; |
1857 case '&': | 1888 case '&': |
1858 strcpy(s2, "&"); | 1889 strcpy(s2, "&"); |
1859 s2 += 5; | 1890 s2 += 5; |
1860 break; | 1891 break; |
1861 default: | 1892 default: |
1862 if (isprint(c)) | 1893 if (isprint(c) || isspace(c)) |
1863 *s2++ = c; | 1894 *s2++ = c; |
1864 else { | 1895 else if (c / 32 == 6 && isCont(s[1])) { |
1896 memcpy(s2, s, 2); | |
1897 s2 += 2; | |
1898 ++s; | |
1899 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { | |
1900 memcpy(s2, s, 3); | |
1901 s2 += 3; | |
1902 s += 2; | |
1903 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { | |
1904 memcpy(s2, s, 4); | |
1905 s2 += 4; | |
1906 s += 3; | |
1907 } else { | |
1865 int len2; | 1908 int len2; |
1866 sprintf(s2, "&#%d;%n", c, &len2); | 1909 sprintf(s2, "&#%d;%n", c, &len2); |
1867 s2 += len2; | 1910 s2 += len2; |
1868 } | 1911 } |
1869 } | 1912 } |
1876 | 1919 |
1877 uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { | 1920 uw_unit uw_Basis_htmlifyString_w(uw_context ctx, uw_Basis_string s) { |
1878 uw_check(ctx, strlen(s) * 6); | 1921 uw_check(ctx, strlen(s) * 6); |
1879 | 1922 |
1880 for (; *s; s++) { | 1923 for (; *s; s++) { |
1881 char c = *s; | 1924 unsigned char c = *s; |
1882 | 1925 |
1883 switch (c) { | 1926 switch (c) { |
1884 case '<': | 1927 case '<': |
1885 uw_write_unsafe(ctx, "<"); | 1928 uw_write_unsafe(ctx, "<"); |
1886 break; | 1929 break; |
1887 case '&': | 1930 case '&': |
1888 uw_write_unsafe(ctx, "&"); | 1931 uw_write_unsafe(ctx, "&"); |
1889 break; | 1932 break; |
1890 default: | 1933 default: |
1891 if (isprint(c)) | 1934 if (isprint(c) || isspace(c)) |
1892 uw_writec_unsafe(ctx, c); | 1935 uw_writec_unsafe(ctx, c); |
1893 else { | 1936 else if (c / 32 == 6 && isCont(s[1])) { |
1937 uw_writec_unsafe(ctx, c); | |
1938 uw_writec_unsafe(ctx, s[1]); | |
1939 ++s; | |
1940 } else if (c / 16 == 14 && isCont(s[1]) && isCont(s[2])) { | |
1941 uw_writec_unsafe(ctx, c); | |
1942 uw_writec_unsafe(ctx, s[1]); | |
1943 uw_writec_unsafe(ctx, s[2]); | |
1944 s += 2; | |
1945 } else if (c / 8 == 30 && isCont(s[1]) && isCont(s[2]) && isCont(s[3])) { | |
1946 uw_writec_unsafe(ctx, c); | |
1947 uw_writec_unsafe(ctx, s[1]); | |
1948 uw_writec_unsafe(ctx, s[2]); | |
1949 uw_writec_unsafe(ctx, s[3]); | |
1950 s += 3; | |
1951 } else { | |
1894 uw_write_unsafe(ctx, "&#"); | 1952 uw_write_unsafe(ctx, "&#"); |
1895 uw_Basis_attrifyInt_w_unsafe(ctx, c); | 1953 uw_Basis_attrifyInt_w_unsafe(ctx, c); |
1896 uw_writec_unsafe(ctx, ';'); | 1954 uw_writec_unsafe(ctx, ';'); |
1897 } | 1955 } |
1898 } | 1956 } |