Mercurial > urweb
changeset 1592:1c9f8f06c1d6
Support the full set of XHTML character entities
author | Adam Chlipala <adam@chlipala.net> |
---|---|
date | Sat, 05 Nov 2011 15:05:13 -0400 (2011-11-05) |
parents | 20f898c29525 |
children | 7e2655b25ea1 |
files | .hgignore Makefile.am Makefile.in src/sources src/urweb.lex src/utf8.sig src/utf8.sml tests/entities.ur xml/parse.sml xml/xhtml-lat1.ent xml/xhtml-special.ent xml/xhtml-symbol.ent |
diffstat | 12 files changed, 724 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/.hgignore Sat Nov 05 13:12:07 2011 -0400 +++ b/.hgignore Sat Nov 05 15:05:13 2011 -0400 @@ -48,6 +48,9 @@ Makefile.coq *.vo +xml/parse +xml/entities.sml + syntax: regexp ^Makefile$
--- a/Makefile.am Sat Nov 05 13:12:07 2011 -0400 +++ b/Makefile.am Sat Nov 05 15:05:13 2011 -0400 @@ -21,7 +21,7 @@ clean-local: rm -f src/*.mlton.grm.* src/*.mlton.lex.* \ - src/urweb.cm src/urweb.mlb + src/urweb.cm src/urweb.mlb xml/parse xml/entities.sml rm -rf .cm src/.cm src/urweb.cm: src/prefix.cm src/sources @@ -55,11 +55,18 @@ # MLTON += -profile $(PROFILE) #endif -bin/urweb: src/compiler.mlb src/urweb.mlb src/*.sig src/*.sml \ +bin/urweb: xml/entities.sml \ + src/compiler.mlb src/urweb.mlb src/*.sig src/*.sml \ src/urweb.mlton.lex.sml \ src/urweb.mlton.grm.sig src/urweb.mlton.grm.sml $(MLTON) -output $@ src/compiler.mlb +xml/entities.sml: xml/parse xml/xhtml-lat1.ent xml/xhtml-special.ent xml/xhtml-symbol.ent + xml/parse >xml/entities.sml + +xml/parse: xml/parse.sml + $(MLTON) xml/parse.sml + install-exec-emacs: if USE_EMACS mkdir -p $(DESTDIR)$(SITELISP)
--- a/Makefile.in Sat Nov 05 13:12:07 2011 -0400 +++ b/Makefile.in Sat Nov 05 15:05:13 2011 -0400 @@ -750,7 +750,7 @@ clean-local: rm -f src/*.mlton.grm.* src/*.mlton.lex.* \ - src/urweb.cm src/urweb.mlb + src/urweb.cm src/urweb.mlb xml/parse xml/entities.sml rm -rf .cm src/.cm src/urweb.cm: src/prefix.cm src/sources @@ -782,11 +782,18 @@ # MLTON += -profile $(PROFILE) #endif -bin/urweb: src/compiler.mlb src/urweb.mlb src/*.sig src/*.sml \ +bin/urweb: xml/entities.sml \ + src/compiler.mlb src/urweb.mlb src/*.sig src/*.sml \ src/urweb.mlton.lex.sml \ src/urweb.mlton.grm.sig src/urweb.mlton.grm.sml $(MLTON) -output $@ src/compiler.mlb +xml/entities.sml: xml/parse xml/xhtml-lat1.ent xml/xhtml-special.ent xml/xhtml-symbol.ent + xml/parse >xml/entities.sml + +xml/parse: xml/parse.sml + $(MLTON) xml/parse.sml + install-exec-emacs: @USE_EMACS_TRUE@ mkdir -p $(DESTDIR)$(SITELISP) @USE_EMACS_TRUE@ cp src/elisp/*.el $(DESTDIR)$(SITELISP)/
--- a/src/sources Sat Nov 05 13:12:07 2011 -0400 +++ b/src/sources Sat Nov 05 15:05:13 2011 -0400 @@ -47,6 +47,11 @@ source.sml +utf8.sig +utf8.sml + +../xml/entities.sml + urweb.grm urweb.lex
--- a/src/urweb.lex Sat Nov 05 13:12:07 2011 -0400 +++ b/src/urweb.lex Sat Nov 05 15:05:13 2011 -0400 @@ -113,6 +113,14 @@ xmlString := false) +structure StringMap = BinaryMapFn(struct + type ord_key = string + val compare = String.compare + end) + +val entities = foldl (fn ((key, value), entities) => StringMap.insert (entities, key, value)) + StringMap.empty Entities.all + fun unescape loc s = let fun process (s, acc) = @@ -141,20 +149,16 @@ let val code = Substring.string (Substring.slice (code, 1, NONE)) in - Option.map chr (Int.fromString code) + Option.map Utf8.encode (Int.fromString code) end - else case Substring.string code of - "amp" => SOME #"&" - | "lt" => SOME #"<" - | "gt" => SOME #">" - | "quot" => SOME #"\"" - | _ => NONE + else + Option.map Utf8.encode (StringMap.find (entities, Substring.string code)) in case special of NONE => (ErrorMsg.errorAt' loc ("Unsupported XML character entity " ^ Substring.string code); "") - | SOME ch => process (s, Substring.full (String.str ch) :: pre :: acc) + | SOME sp => process (s, Substring.full sp :: pre :: acc) end end end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/utf8.sig Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,32 @@ +(* Copyright (c) 2011, Adam Chlipala + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - The names of contributors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *) + +(* UTF-8 conversion *) + +signature UTF8 = sig + val encode : int -> string +end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/utf8.sml Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,59 @@ +(* Copyright (c) 2011, Adam Chlipala + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - The names of contributors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *) + +(* UTF-8 conversion *) + +structure Utf8 :> UTF8 = struct + +fun byte n = str (chr (Word.toInt n)) + +fun encode n = + if n <= 0 then + raise Fail "Invalid character to UTF-8-encode" + else if n <= 0x7F then + str (chr n) + else if n <= 0x7FF then + let + val w = Word.fromInt n + val b1 = Word.orb (Word.fromInt (128 + 64), Word.>> (w, Word.fromInt 6)) + val b2 = Word.orb (Word.fromInt 128, Word.andb (w, Word.fromInt 63)) + in + byte b1 ^ byte b2 + end + else if n <= 0xFFFF then + let + val w = Word.fromInt n + val b1 = Word.orb (Word.fromInt (128 + 64 + 32), Word.>> (w, Word.fromInt 12)) + val b2 = Word.orb (Word.fromInt 128, Word.andb (Word.>> (w, Word.fromInt 6), Word.fromInt 63)) + val b3 = Word.orb (Word.fromInt 128, Word.andb (w, Word.fromInt 63)) + in + byte b1 ^ byte b2 ^ byte b3 + end + else + raise Fail "Exceeded supported range for UTF-8 characters" + +end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/entities.ur Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,5 @@ +fun main () : transaction page = return <xml><body> + Hello world! & so on, © me today (8 €)<br/> + ♠ ♣ ♥ ♦<br/> + † DANGER † +</body></xml>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/parse.sml Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,77 @@ +(* Copyright (c) 2011, Adam Chlipala + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - The names of contributors may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + *) + +(* Building SML code from XML entity tables *) + +fun main () = + let + fun doFile fname = + let + val inf = TextIO.openIn fname + + fun loop () = + case TextIO.inputLine inf of + NONE => TextIO.closeIn inf + | SOME line => + if String.isPrefix "<!ENTITY " line then + case String.tokens (fn ch => Char.isSpace ch orelse ch = #">") line of + "<!ENTITY" :: ent :: exp :: _ => + let + val exp = if String.isPrefix "\"&#" exp andalso String.isSuffix ";\"" exp then + let + val middle = String.substring (exp, 3, size exp - 5) + in + if CharVector.all Char.isDigit middle then + middle + else if String.isPrefix "38;#" middle then + String.extract (middle, 4, NONE) + else + raise Fail "Bad entity expression [1]" + end + else + raise Fail "Bad entity expansion [2]" + in + print ("\t\t(\"" ^ ent ^ "\", " ^ exp ^ "),\n"); + loop () + end + | _ => raise Fail "Bad ENTITY line" + else + loop () + in + loop () + end + in + print "structure Entities = struct\n"; + print "\tval all = [\n"; + doFile "xml/xhtml-lat1.ent"; + doFile "xml/xhtml-special.ent"; + doFile "xml/xhtml-symbol.ent"; + print "\t(\"\", 0)]\n"; + print "end\n" + end + +val () = main ()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/xhtml-lat1.ent Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,196 @@ +<!-- Portions (C) International Organization for Standardization 1986 + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. +--> +<!-- Character entity set. Typical invocation: + <!ENTITY % HTMLlat1 PUBLIC + "-//W3C//ENTITIES Latin 1 for XHTML//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent"> + %HTMLlat1; +--> + +<!ENTITY nbsp " "> <!-- no-break space = non-breaking space, + U+00A0 ISOnum --> +<!ENTITY iexcl "¡"> <!-- inverted exclamation mark, U+00A1 ISOnum --> +<!ENTITY cent "¢"> <!-- cent sign, U+00A2 ISOnum --> +<!ENTITY pound "£"> <!-- pound sign, U+00A3 ISOnum --> +<!ENTITY curren "¤"> <!-- currency sign, U+00A4 ISOnum --> +<!ENTITY yen "¥"> <!-- yen sign = yuan sign, U+00A5 ISOnum --> +<!ENTITY brvbar "¦"> <!-- broken bar = broken vertical bar, + U+00A6 ISOnum --> +<!ENTITY sect "§"> <!-- section sign, U+00A7 ISOnum --> +<!ENTITY uml "¨"> <!-- diaeresis = spacing diaeresis, + U+00A8 ISOdia --> +<!ENTITY copy "©"> <!-- copyright sign, U+00A9 ISOnum --> +<!ENTITY ordf "ª"> <!-- feminine ordinal indicator, U+00AA ISOnum --> +<!ENTITY laquo "«"> <!-- left-pointing double angle quotation mark + = left pointing guillemet, U+00AB ISOnum --> +<!ENTITY not "¬"> <!-- not sign = angled dash, + U+00AC ISOnum --> +<!ENTITY shy "­"> <!-- soft hyphen = discretionary hyphen, + U+00AD ISOnum --> +<!ENTITY reg "®"> <!-- registered sign = registered trade mark sign, + U+00AE ISOnum --> +<!ENTITY macr "¯"> <!-- macron = spacing macron = overline + = APL overbar, U+00AF ISOdia --> +<!ENTITY deg "°"> <!-- degree sign, U+00B0 ISOnum --> +<!ENTITY plusmn "±"> <!-- plus-minus sign = plus-or-minus sign, + U+00B1 ISOnum --> +<!ENTITY sup2 "²"> <!-- superscript two = superscript digit two + = squared, U+00B2 ISOnum --> +<!ENTITY sup3 "³"> <!-- superscript three = superscript digit three + = cubed, U+00B3 ISOnum --> +<!ENTITY acute "´"> <!-- acute accent = spacing acute, + U+00B4 ISOdia --> +<!ENTITY micro "µ"> <!-- micro sign, U+00B5 ISOnum --> +<!ENTITY para "¶"> <!-- pilcrow sign = paragraph sign, + U+00B6 ISOnum --> +<!ENTITY middot "·"> <!-- middle dot = Georgian comma + = Greek middle dot, U+00B7 ISOnum --> +<!ENTITY cedil "¸"> <!-- cedilla = spacing cedilla, U+00B8 ISOdia --> +<!ENTITY sup1 "¹"> <!-- superscript one = superscript digit one, + U+00B9 ISOnum --> +<!ENTITY ordm "º"> <!-- masculine ordinal indicator, + U+00BA ISOnum --> +<!ENTITY raquo "»"> <!-- right-pointing double angle quotation mark + = right pointing guillemet, U+00BB ISOnum --> +<!ENTITY frac14 "¼"> <!-- vulgar fraction one quarter + = fraction one quarter, U+00BC ISOnum --> +<!ENTITY frac12 "½"> <!-- vulgar fraction one half + = fraction one half, U+00BD ISOnum --> +<!ENTITY frac34 "¾"> <!-- vulgar fraction three quarters + = fraction three quarters, U+00BE ISOnum --> +<!ENTITY iquest "¿"> <!-- inverted question mark + = turned question mark, U+00BF ISOnum --> +<!ENTITY Agrave "À"> <!-- latin capital letter A with grave + = latin capital letter A grave, + U+00C0 ISOlat1 --> +<!ENTITY Aacute "Á"> <!-- latin capital letter A with acute, + U+00C1 ISOlat1 --> +<!ENTITY Acirc "Â"> <!-- latin capital letter A with circumflex, + U+00C2 ISOlat1 --> +<!ENTITY Atilde "Ã"> <!-- latin capital letter A with tilde, + U+00C3 ISOlat1 --> +<!ENTITY Auml "Ä"> <!-- latin capital letter A with diaeresis, + U+00C4 ISOlat1 --> +<!ENTITY Aring "Å"> <!-- latin capital letter A with ring above + = latin capital letter A ring, + U+00C5 ISOlat1 --> +<!ENTITY AElig "Æ"> <!-- latin capital letter AE + = latin capital ligature AE, + U+00C6 ISOlat1 --> +<!ENTITY Ccedil "Ç"> <!-- latin capital letter C with cedilla, + U+00C7 ISOlat1 --> +<!ENTITY Egrave "È"> <!-- latin capital letter E with grave, + U+00C8 ISOlat1 --> +<!ENTITY Eacute "É"> <!-- latin capital letter E with acute, + U+00C9 ISOlat1 --> +<!ENTITY Ecirc "Ê"> <!-- latin capital letter E with circumflex, + U+00CA ISOlat1 --> +<!ENTITY Euml "Ë"> <!-- latin capital letter E with diaeresis, + U+00CB ISOlat1 --> +<!ENTITY Igrave "Ì"> <!-- latin capital letter I with grave, + U+00CC ISOlat1 --> +<!ENTITY Iacute "Í"> <!-- latin capital letter I with acute, + U+00CD ISOlat1 --> +<!ENTITY Icirc "Î"> <!-- latin capital letter I with circumflex, + U+00CE ISOlat1 --> +<!ENTITY Iuml "Ï"> <!-- latin capital letter I with diaeresis, + U+00CF ISOlat1 --> +<!ENTITY ETH "Ð"> <!-- latin capital letter ETH, U+00D0 ISOlat1 --> +<!ENTITY Ntilde "Ñ"> <!-- latin capital letter N with tilde, + U+00D1 ISOlat1 --> +<!ENTITY Ograve "Ò"> <!-- latin capital letter O with grave, + U+00D2 ISOlat1 --> +<!ENTITY Oacute "Ó"> <!-- latin capital letter O with acute, + U+00D3 ISOlat1 --> +<!ENTITY Ocirc "Ô"> <!-- latin capital letter O with circumflex, + U+00D4 ISOlat1 --> +<!ENTITY Otilde "Õ"> <!-- latin capital letter O with tilde, + U+00D5 ISOlat1 --> +<!ENTITY Ouml "Ö"> <!-- latin capital letter O with diaeresis, + U+00D6 ISOlat1 --> +<!ENTITY times "×"> <!-- multiplication sign, U+00D7 ISOnum --> +<!ENTITY Oslash "Ø"> <!-- latin capital letter O with stroke + = latin capital letter O slash, + U+00D8 ISOlat1 --> +<!ENTITY Ugrave "Ù"> <!-- latin capital letter U with grave, + U+00D9 ISOlat1 --> +<!ENTITY Uacute "Ú"> <!-- latin capital letter U with acute, + U+00DA ISOlat1 --> +<!ENTITY Ucirc "Û"> <!-- latin capital letter U with circumflex, + U+00DB ISOlat1 --> +<!ENTITY Uuml "Ü"> <!-- latin capital letter U with diaeresis, + U+00DC ISOlat1 --> +<!ENTITY Yacute "Ý"> <!-- latin capital letter Y with acute, + U+00DD ISOlat1 --> +<!ENTITY THORN "Þ"> <!-- latin capital letter THORN, + U+00DE ISOlat1 --> +<!ENTITY szlig "ß"> <!-- latin small letter sharp s = ess-zed, + U+00DF ISOlat1 --> +<!ENTITY agrave "à"> <!-- latin small letter a with grave + = latin small letter a grave, + U+00E0 ISOlat1 --> +<!ENTITY aacute "á"> <!-- latin small letter a with acute, + U+00E1 ISOlat1 --> +<!ENTITY acirc "â"> <!-- latin small letter a with circumflex, + U+00E2 ISOlat1 --> +<!ENTITY atilde "ã"> <!-- latin small letter a with tilde, + U+00E3 ISOlat1 --> +<!ENTITY auml "ä"> <!-- latin small letter a with diaeresis, + U+00E4 ISOlat1 --> +<!ENTITY aring "å"> <!-- latin small letter a with ring above + = latin small letter a ring, + U+00E5 ISOlat1 --> +<!ENTITY aelig "æ"> <!-- latin small letter ae + = latin small ligature ae, U+00E6 ISOlat1 --> +<!ENTITY ccedil "ç"> <!-- latin small letter c with cedilla, + U+00E7 ISOlat1 --> +<!ENTITY egrave "è"> <!-- latin small letter e with grave, + U+00E8 ISOlat1 --> +<!ENTITY eacute "é"> <!-- latin small letter e with acute, + U+00E9 ISOlat1 --> +<!ENTITY ecirc "ê"> <!-- latin small letter e with circumflex, + U+00EA ISOlat1 --> +<!ENTITY euml "ë"> <!-- latin small letter e with diaeresis, + U+00EB ISOlat1 --> +<!ENTITY igrave "ì"> <!-- latin small letter i with grave, + U+00EC ISOlat1 --> +<!ENTITY iacute "í"> <!-- latin small letter i with acute, + U+00ED ISOlat1 --> +<!ENTITY icirc "î"> <!-- latin small letter i with circumflex, + U+00EE ISOlat1 --> +<!ENTITY iuml "ï"> <!-- latin small letter i with diaeresis, + U+00EF ISOlat1 --> +<!ENTITY eth "ð"> <!-- latin small letter eth, U+00F0 ISOlat1 --> +<!ENTITY ntilde "ñ"> <!-- latin small letter n with tilde, + U+00F1 ISOlat1 --> +<!ENTITY ograve "ò"> <!-- latin small letter o with grave, + U+00F2 ISOlat1 --> +<!ENTITY oacute "ó"> <!-- latin small letter o with acute, + U+00F3 ISOlat1 --> +<!ENTITY ocirc "ô"> <!-- latin small letter o with circumflex, + U+00F4 ISOlat1 --> +<!ENTITY otilde "õ"> <!-- latin small letter o with tilde, + U+00F5 ISOlat1 --> +<!ENTITY ouml "ö"> <!-- latin small letter o with diaeresis, + U+00F6 ISOlat1 --> +<!ENTITY divide "÷"> <!-- division sign, U+00F7 ISOnum --> +<!ENTITY oslash "ø"> <!-- latin small letter o with stroke, + = latin small letter o slash, + U+00F8 ISOlat1 --> +<!ENTITY ugrave "ù"> <!-- latin small letter u with grave, + U+00F9 ISOlat1 --> +<!ENTITY uacute "ú"> <!-- latin small letter u with acute, + U+00FA ISOlat1 --> +<!ENTITY ucirc "û"> <!-- latin small letter u with circumflex, + U+00FB ISOlat1 --> +<!ENTITY uuml "ü"> <!-- latin small letter u with diaeresis, + U+00FC ISOlat1 --> +<!ENTITY yacute "ý"> <!-- latin small letter y with acute, + U+00FD ISOlat1 --> +<!ENTITY thorn "þ"> <!-- latin small letter thorn, + U+00FE ISOlat1 --> +<!ENTITY yuml "ÿ"> <!-- latin small letter y with diaeresis, + U+00FF ISOlat1 -->
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/xhtml-special.ent Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,80 @@ +<!-- Special characters for XHTML --> + +<!-- Character entity set. Typical invocation: + <!ENTITY % HTMLspecial PUBLIC + "-//W3C//ENTITIES Special for XHTML//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent"> + %HTMLspecial; +--> + +<!-- Portions (C) International Organization for Standardization 1986: + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. +--> + +<!-- Relevant ISO entity set is given unless names are newly introduced. + New names (i.e., not in ISO 8879 list) do not clash with any + existing ISO 8879 entity names. ISO 10646 character numbers + are given for each character, in hex. values are decimal + conversions of the ISO 10646 values and refer to the document + character set. Names are Unicode names. +--> + +<!-- C0 Controls and Basic Latin --> +<!ENTITY quot """> <!-- quotation mark, U+0022 ISOnum --> +<!ENTITY amp "&#38;"> <!-- ampersand, U+0026 ISOnum --> +<!ENTITY lt "&#60;"> <!-- less-than sign, U+003C ISOnum --> +<!ENTITY gt ">"> <!-- greater-than sign, U+003E ISOnum --> +<!ENTITY apos "'"> <!-- apostrophe = APL quote, U+0027 ISOnum --> + +<!-- Latin Extended-A --> +<!ENTITY OElig "Œ"> <!-- latin capital ligature OE, + U+0152 ISOlat2 --> +<!ENTITY oelig "œ"> <!-- latin small ligature oe, U+0153 ISOlat2 --> +<!-- ligature is a misnomer, this is a separate character in some languages --> +<!ENTITY Scaron "Š"> <!-- latin capital letter S with caron, + U+0160 ISOlat2 --> +<!ENTITY scaron "š"> <!-- latin small letter s with caron, + U+0161 ISOlat2 --> +<!ENTITY Yuml "Ÿ"> <!-- latin capital letter Y with diaeresis, + U+0178 ISOlat2 --> + +<!-- Spacing Modifier Letters --> +<!ENTITY circ "ˆ"> <!-- modifier letter circumflex accent, + U+02C6 ISOpub --> +<!ENTITY tilde "˜"> <!-- small tilde, U+02DC ISOdia --> + +<!-- General Punctuation --> +<!ENTITY ensp " "> <!-- en space, U+2002 ISOpub --> +<!ENTITY emsp " "> <!-- em space, U+2003 ISOpub --> +<!ENTITY thinsp " "> <!-- thin space, U+2009 ISOpub --> +<!ENTITY zwnj "‌"> <!-- zero width non-joiner, + U+200C NEW RFC 2070 --> +<!ENTITY zwj "‍"> <!-- zero width joiner, U+200D NEW RFC 2070 --> +<!ENTITY lrm "‎"> <!-- left-to-right mark, U+200E NEW RFC 2070 --> +<!ENTITY rlm "‏"> <!-- right-to-left mark, U+200F NEW RFC 2070 --> +<!ENTITY ndash "–"> <!-- en dash, U+2013 ISOpub --> +<!ENTITY mdash "—"> <!-- em dash, U+2014 ISOpub --> +<!ENTITY lsquo "‘"> <!-- left single quotation mark, + U+2018 ISOnum --> +<!ENTITY rsquo "’"> <!-- right single quotation mark, + U+2019 ISOnum --> +<!ENTITY sbquo "‚"> <!-- single low-9 quotation mark, U+201A NEW --> +<!ENTITY ldquo "“"> <!-- left double quotation mark, + U+201C ISOnum --> +<!ENTITY rdquo "”"> <!-- right double quotation mark, + U+201D ISOnum --> +<!ENTITY bdquo "„"> <!-- double low-9 quotation mark, U+201E NEW --> +<!ENTITY dagger "†"> <!-- dagger, U+2020 ISOpub --> +<!ENTITY Dagger "‡"> <!-- double dagger, U+2021 ISOpub --> +<!ENTITY permil "‰"> <!-- per mille sign, U+2030 ISOtech --> +<!ENTITY lsaquo "‹"> <!-- single left-pointing angle quotation mark, + U+2039 ISO proposed --> +<!-- lsaquo is proposed but not yet ISO standardized --> +<!ENTITY rsaquo "›"> <!-- single right-pointing angle quotation mark, + U+203A ISO proposed --> +<!-- rsaquo is proposed but not yet ISO standardized --> + +<!-- Currency Symbols --> +<!ENTITY euro "€"> <!-- euro sign, U+20AC NEW -->
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xml/xhtml-symbol.ent Sat Nov 05 15:05:13 2011 -0400 @@ -0,0 +1,237 @@ +<!-- Mathematical, Greek and Symbolic characters for XHTML --> + +<!-- Character entity set. Typical invocation: + <!ENTITY % HTMLsymbol PUBLIC + "-//W3C//ENTITIES Symbols for XHTML//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent"> + %HTMLsymbol; +--> + +<!-- Portions (C) International Organization for Standardization 1986: + Permission to copy in any form is granted for use with + conforming SGML systems and applications as defined in + ISO 8879, provided this notice is included in all copies. +--> + +<!-- Relevant ISO entity set is given unless names are newly introduced. + New names (i.e., not in ISO 8879 list) do not clash with any + existing ISO 8879 entity names. ISO 10646 character numbers + are given for each character, in hex. values are decimal + conversions of the ISO 10646 values and refer to the document + character set. Names are Unicode names. +--> + +<!-- Latin Extended-B --> +<!ENTITY fnof "ƒ"> <!-- latin small letter f with hook = function + = florin, U+0192 ISOtech --> + +<!-- Greek --> +<!ENTITY Alpha "Α"> <!-- greek capital letter alpha, U+0391 --> +<!ENTITY Beta "Β"> <!-- greek capital letter beta, U+0392 --> +<!ENTITY Gamma "Γ"> <!-- greek capital letter gamma, + U+0393 ISOgrk3 --> +<!ENTITY Delta "Δ"> <!-- greek capital letter delta, + U+0394 ISOgrk3 --> +<!ENTITY Epsilon "Ε"> <!-- greek capital letter epsilon, U+0395 --> +<!ENTITY Zeta "Ζ"> <!-- greek capital letter zeta, U+0396 --> +<!ENTITY Eta "Η"> <!-- greek capital letter eta, U+0397 --> +<!ENTITY Theta "Θ"> <!-- greek capital letter theta, + U+0398 ISOgrk3 --> +<!ENTITY Iota "Ι"> <!-- greek capital letter iota, U+0399 --> +<!ENTITY Kappa "Κ"> <!-- greek capital letter kappa, U+039A --> +<!ENTITY Lambda "Λ"> <!-- greek capital letter lamda, + U+039B ISOgrk3 --> +<!ENTITY Mu "Μ"> <!-- greek capital letter mu, U+039C --> +<!ENTITY Nu "Ν"> <!-- greek capital letter nu, U+039D --> +<!ENTITY Xi "Ξ"> <!-- greek capital letter xi, U+039E ISOgrk3 --> +<!ENTITY Omicron "Ο"> <!-- greek capital letter omicron, U+039F --> +<!ENTITY Pi "Π"> <!-- greek capital letter pi, U+03A0 ISOgrk3 --> +<!ENTITY Rho "Ρ"> <!-- greek capital letter rho, U+03A1 --> +<!-- there is no Sigmaf, and no U+03A2 character either --> +<!ENTITY Sigma "Σ"> <!-- greek capital letter sigma, + U+03A3 ISOgrk3 --> +<!ENTITY Tau "Τ"> <!-- greek capital letter tau, U+03A4 --> +<!ENTITY Upsilon "Υ"> <!-- greek capital letter upsilon, + U+03A5 ISOgrk3 --> +<!ENTITY Phi "Φ"> <!-- greek capital letter phi, + U+03A6 ISOgrk3 --> +<!ENTITY Chi "Χ"> <!-- greek capital letter chi, U+03A7 --> +<!ENTITY Psi "Ψ"> <!-- greek capital letter psi, + U+03A8 ISOgrk3 --> +<!ENTITY Omega "Ω"> <!-- greek capital letter omega, + U+03A9 ISOgrk3 --> + +<!ENTITY alpha "α"> <!-- greek small letter alpha, + U+03B1 ISOgrk3 --> +<!ENTITY beta "β"> <!-- greek small letter beta, U+03B2 ISOgrk3 --> +<!ENTITY gamma "γ"> <!-- greek small letter gamma, + U+03B3 ISOgrk3 --> +<!ENTITY delta "δ"> <!-- greek small letter delta, + U+03B4 ISOgrk3 --> +<!ENTITY epsilon "ε"> <!-- greek small letter epsilon, + U+03B5 ISOgrk3 --> +<!ENTITY zeta "ζ"> <!-- greek small letter zeta, U+03B6 ISOgrk3 --> +<!ENTITY eta "η"> <!-- greek small letter eta, U+03B7 ISOgrk3 --> +<!ENTITY theta "θ"> <!-- greek small letter theta, + U+03B8 ISOgrk3 --> +<!ENTITY iota "ι"> <!-- greek small letter iota, U+03B9 ISOgrk3 --> +<!ENTITY kappa "κ"> <!-- greek small letter kappa, + U+03BA ISOgrk3 --> +<!ENTITY lambda "λ"> <!-- greek small letter lamda, + U+03BB ISOgrk3 --> +<!ENTITY mu "μ"> <!-- greek small letter mu, U+03BC ISOgrk3 --> +<!ENTITY nu "ν"> <!-- greek small letter nu, U+03BD ISOgrk3 --> +<!ENTITY xi "ξ"> <!-- greek small letter xi, U+03BE ISOgrk3 --> +<!ENTITY omicron "ο"> <!-- greek small letter omicron, U+03BF NEW --> +<!ENTITY pi "π"> <!-- greek small letter pi, U+03C0 ISOgrk3 --> +<!ENTITY rho "ρ"> <!-- greek small letter rho, U+03C1 ISOgrk3 --> +<!ENTITY sigmaf "ς"> <!-- greek small letter final sigma, + U+03C2 ISOgrk3 --> +<!ENTITY sigma "σ"> <!-- greek small letter sigma, + U+03C3 ISOgrk3 --> +<!ENTITY tau "τ"> <!-- greek small letter tau, U+03C4 ISOgrk3 --> +<!ENTITY upsilon "υ"> <!-- greek small letter upsilon, + U+03C5 ISOgrk3 --> +<!ENTITY phi "φ"> <!-- greek small letter phi, U+03C6 ISOgrk3 --> +<!ENTITY chi "χ"> <!-- greek small letter chi, U+03C7 ISOgrk3 --> +<!ENTITY psi "ψ"> <!-- greek small letter psi, U+03C8 ISOgrk3 --> +<!ENTITY omega "ω"> <!-- greek small letter omega, + U+03C9 ISOgrk3 --> +<!ENTITY thetasym "ϑ"> <!-- greek theta symbol, + U+03D1 NEW --> +<!ENTITY upsih "ϒ"> <!-- greek upsilon with hook symbol, + U+03D2 NEW --> +<!ENTITY piv "ϖ"> <!-- greek pi symbol, U+03D6 ISOgrk3 --> + +<!-- General Punctuation --> +<!ENTITY bull "•"> <!-- bullet = black small circle, + U+2022 ISOpub --> +<!-- bullet is NOT the same as bullet operator, U+2219 --> +<!ENTITY hellip "…"> <!-- horizontal ellipsis = three dot leader, + U+2026 ISOpub --> +<!ENTITY prime "′"> <!-- prime = minutes = feet, U+2032 ISOtech --> +<!ENTITY Prime "″"> <!-- double prime = seconds = inches, + U+2033 ISOtech --> +<!ENTITY oline "‾"> <!-- overline = spacing overscore, + U+203E NEW --> +<!ENTITY frasl "⁄"> <!-- fraction slash, U+2044 NEW --> + +<!-- Letterlike Symbols --> +<!ENTITY weierp "℘"> <!-- script capital P = power set + = Weierstrass p, U+2118 ISOamso --> +<!ENTITY image "ℑ"> <!-- black-letter capital I = imaginary part, + U+2111 ISOamso --> +<!ENTITY real "ℜ"> <!-- black-letter capital R = real part symbol, + U+211C ISOamso --> +<!ENTITY trade "™"> <!-- trade mark sign, U+2122 ISOnum --> +<!ENTITY alefsym "ℵ"> <!-- alef symbol = first transfinite cardinal, + U+2135 NEW --> +<!-- alef symbol is NOT the same as hebrew letter alef, + U+05D0 although the same glyph could be used to depict both characters --> + +<!-- Arrows --> +<!ENTITY larr "←"> <!-- leftwards arrow, U+2190 ISOnum --> +<!ENTITY uarr "↑"> <!-- upwards arrow, U+2191 ISOnum--> +<!ENTITY rarr "→"> <!-- rightwards arrow, U+2192 ISOnum --> +<!ENTITY darr "↓"> <!-- downwards arrow, U+2193 ISOnum --> +<!ENTITY harr "↔"> <!-- left right arrow, U+2194 ISOamsa --> +<!ENTITY crarr "↵"> <!-- downwards arrow with corner leftwards + = carriage return, U+21B5 NEW --> +<!ENTITY lArr "⇐"> <!-- leftwards double arrow, U+21D0 ISOtech --> +<!-- Unicode does not say that lArr is the same as the 'is implied by' arrow + but also does not have any other character for that function. So lArr can + be used for 'is implied by' as ISOtech suggests --> +<!ENTITY uArr "⇑"> <!-- upwards double arrow, U+21D1 ISOamsa --> +<!ENTITY rArr "⇒"> <!-- rightwards double arrow, + U+21D2 ISOtech --> +<!-- Unicode does not say this is the 'implies' character but does not have + another character with this function so rArr can be used for 'implies' + as ISOtech suggests --> +<!ENTITY dArr "⇓"> <!-- downwards double arrow, U+21D3 ISOamsa --> +<!ENTITY hArr "⇔"> <!-- left right double arrow, + U+21D4 ISOamsa --> + +<!-- Mathematical Operators --> +<!ENTITY forall "∀"> <!-- for all, U+2200 ISOtech --> +<!ENTITY part "∂"> <!-- partial differential, U+2202 ISOtech --> +<!ENTITY exist "∃"> <!-- there exists, U+2203 ISOtech --> +<!ENTITY empty "∅"> <!-- empty set = null set, U+2205 ISOamso --> +<!ENTITY nabla "∇"> <!-- nabla = backward difference, + U+2207 ISOtech --> +<!ENTITY isin "∈"> <!-- element of, U+2208 ISOtech --> +<!ENTITY notin "∉"> <!-- not an element of, U+2209 ISOtech --> +<!ENTITY ni "∋"> <!-- contains as member, U+220B ISOtech --> +<!ENTITY prod "∏"> <!-- n-ary product = product sign, + U+220F ISOamsb --> +<!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though + the same glyph might be used for both --> +<!ENTITY sum "∑"> <!-- n-ary summation, U+2211 ISOamsb --> +<!-- sum is NOT the same character as U+03A3 'greek capital letter sigma' + though the same glyph might be used for both --> +<!ENTITY minus "−"> <!-- minus sign, U+2212 ISOtech --> +<!ENTITY lowast "∗"> <!-- asterisk operator, U+2217 ISOtech --> +<!ENTITY radic "√"> <!-- square root = radical sign, + U+221A ISOtech --> +<!ENTITY prop "∝"> <!-- proportional to, U+221D ISOtech --> +<!ENTITY infin "∞"> <!-- infinity, U+221E ISOtech --> +<!ENTITY ang "∠"> <!-- angle, U+2220 ISOamso --> +<!ENTITY and "∧"> <!-- logical and = wedge, U+2227 ISOtech --> +<!ENTITY or "∨"> <!-- logical or = vee, U+2228 ISOtech --> +<!ENTITY cap "∩"> <!-- intersection = cap, U+2229 ISOtech --> +<!ENTITY cup "∪"> <!-- union = cup, U+222A ISOtech --> +<!ENTITY int "∫"> <!-- integral, U+222B ISOtech --> +<!ENTITY there4 "∴"> <!-- therefore, U+2234 ISOtech --> +<!ENTITY sim "∼"> <!-- tilde operator = varies with = similar to, + U+223C ISOtech --> +<!-- tilde operator is NOT the same character as the tilde, U+007E, + although the same glyph might be used to represent both --> +<!ENTITY cong "≅"> <!-- approximately equal to, U+2245 ISOtech --> +<!ENTITY asymp "≈"> <!-- almost equal to = asymptotic to, + U+2248 ISOamsr --> +<!ENTITY ne "≠"> <!-- not equal to, U+2260 ISOtech --> +<!ENTITY equiv "≡"> <!-- identical to, U+2261 ISOtech --> +<!ENTITY le "≤"> <!-- less-than or equal to, U+2264 ISOtech --> +<!ENTITY ge "≥"> <!-- greater-than or equal to, + U+2265 ISOtech --> +<!ENTITY sub "⊂"> <!-- subset of, U+2282 ISOtech --> +<!ENTITY sup "⊃"> <!-- superset of, U+2283 ISOtech --> +<!ENTITY nsub "⊄"> <!-- not a subset of, U+2284 ISOamsn --> +<!ENTITY sube "⊆"> <!-- subset of or equal to, U+2286 ISOtech --> +<!ENTITY supe "⊇"> <!-- superset of or equal to, + U+2287 ISOtech --> +<!ENTITY oplus "⊕"> <!-- circled plus = direct sum, + U+2295 ISOamsb --> +<!ENTITY otimes "⊗"> <!-- circled times = vector product, + U+2297 ISOamsb --> +<!ENTITY perp "⊥"> <!-- up tack = orthogonal to = perpendicular, + U+22A5 ISOtech --> +<!ENTITY sdot "⋅"> <!-- dot operator, U+22C5 ISOamsb --> +<!-- dot operator is NOT the same character as U+00B7 middle dot --> + +<!-- Miscellaneous Technical --> +<!ENTITY lceil "⌈"> <!-- left ceiling = APL upstile, + U+2308 ISOamsc --> +<!ENTITY rceil "⌉"> <!-- right ceiling, U+2309 ISOamsc --> +<!ENTITY lfloor "⌊"> <!-- left floor = APL downstile, + U+230A ISOamsc --> +<!ENTITY rfloor "⌋"> <!-- right floor, U+230B ISOamsc --> +<!ENTITY lang "〈"> <!-- left-pointing angle bracket = bra, + U+2329 ISOtech --> +<!-- lang is NOT the same character as U+003C 'less than sign' + or U+2039 'single left-pointing angle quotation mark' --> +<!ENTITY rang "〉"> <!-- right-pointing angle bracket = ket, + U+232A ISOtech --> +<!-- rang is NOT the same character as U+003E 'greater than sign' + or U+203A 'single right-pointing angle quotation mark' --> + +<!-- Geometric Shapes --> +<!ENTITY loz "◊"> <!-- lozenge, U+25CA ISOpub --> + +<!-- Miscellaneous Symbols --> +<!ENTITY spades "♠"> <!-- black spade suit, U+2660 ISOpub --> +<!-- black here seems to mean filled as opposed to hollow --> +<!ENTITY clubs "♣"> <!-- black club suit = shamrock, + U+2663 ISOpub --> +<!ENTITY hearts "♥"> <!-- black heart suit = valentine, + U+2665 ISOpub --> +<!ENTITY diams "♦"> <!-- black diamond suit, U+2666 ISOpub -->