annotate src/lacweb.lex @ 235:0608a0cfd32a

COUNT
author Adam Chlipala <adamc@hcoop.net>
date Thu, 28 Aug 2008 11:59:46 -0400
parents a338da9d82f3
children f5732dc1316c
rev   line source
adamc@1 1 (* Copyright (c) 2008, Adam Chlipala
adamc@1 2 * All rights reserved.
adamc@1 3 *
adamc@1 4 * Redistribution and use in source and binary forms, with or without
adamc@1 5 * modification, are permitted provided that the following conditions are met:
adamc@1 6 *
adamc@1 7 * - Redistributions of source code must retain the above copyright notice,
adamc@1 8 * this list of conditions and the following disclaimer.
adamc@1 9 * - Redistributions in binary form must reproduce the above copyright notice,
adamc@1 10 * this list of conditions and the following disclaimer in the documentation
adamc@1 11 * and/or other materials provided with the distribution.
adamc@1 12 * - The names of contributors may not be used to endorse or promote products
adamc@1 13 * derived from this software without specific prior written permission.
adamc@1 14 *
adamc@1 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
adamc@1 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
adamc@1 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
adamc@1 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
adamc@1 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
adamc@1 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
adamc@1 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
adamc@1 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
adamc@1 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
adamc@1 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
adamc@1 25 * POSSIBILITY OF SUCH DAMAGE.
adamc@1 26 *)
adamc@1 27
adamc@1 28 (* Lexing info for Laconic/Web programs *)
adamc@1 29
adamc@1 30 type pos = int
adamc@1 31 type svalue = Tokens.svalue
adamc@1 32 type ('a,'b) token = ('a,'b) Tokens.token
adamc@1 33 type lexresult = (svalue,pos) Tokens.token
adamc@1 34
adamc@1 35 local
adamc@1 36 val commentLevel = ref 0
adamc@1 37 val commentPos = ref 0
adamc@1 38 in
adamc@1 39 fun enterComment pos =
adamc@1 40 (if !commentLevel = 0 then
adamc@1 41 commentPos := pos
adamc@1 42 else
adamc@1 43 ();
adamc@1 44 commentLevel := !commentLevel + 1)
adamc@1 45
adamc@1 46 fun exitComment () =
adamc@1 47 (ignore (commentLevel := !commentLevel - 1);
adamc@1 48 !commentLevel = 0)
adamc@1 49
adamc@1 50 fun eof () =
adamc@1 51 let
adamc@1 52 val pos = ErrorMsg.lastLineStart ()
adamc@1 53 in
adamc@1 54 if !commentLevel > 0 then
adamc@1 55 ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment"
adamc@1 56 else
adamc@1 57 ();
adamc@1 58 Tokens.EOF (pos, pos)
adamc@1 59 end
adamc@1 60 end
adamc@1 61
adamc@229 62 val strEnder = ref #"\""
adamc@14 63 val str = ref ([] : char list)
adamc@14 64 val strStart = ref 0
adamc@14 65
adamc@54 66 local
adamc@54 67 val initSig = ref false
adamc@54 68 val offset = ref 0
adamc@54 69 in
adamc@54 70
adamc@54 71 fun initialSig () = initSig := true
adamc@54 72
adamc@54 73 fun pos yypos = yypos - !offset
adamc@54 74
adamc@54 75 fun newline yypos =
adamc@54 76 if !initSig then
adamc@54 77 (initSig := false;
adamc@54 78 offset := yypos + 1)
adamc@54 79 else
adamc@54 80 ErrorMsg.newline (pos yypos)
adamc@54 81
adamc@54 82 end
adamc@54 83
adamc@91 84 val xmlTag = ref ([] : string list)
adamc@91 85 val xmlString = ref true
adamc@91 86 val braceLevels = ref ([] : ((unit -> unit) * int) list)
adamc@91 87
adamc@91 88 fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels)
adamc@91 89
adamc@91 90 fun enterBrace () =
adamc@91 91 case !braceLevels of
adamc@91 92 (s, i) :: rest => braceLevels := (s, i+1) :: rest
adamc@91 93 | _ => ()
adamc@91 94
adamc@91 95 fun exitBrace () =
adamc@91 96 case !braceLevels of
adamc@91 97 (s, i) :: rest =>
adamc@91 98 if i = 1 then
adamc@91 99 (braceLevels := rest;
adamc@91 100 s ())
adamc@91 101 else
adamc@91 102 braceLevels := (s, i-1) :: rest
adamc@91 103 | _ => ()
adamc@91 104
adamc@91 105 fun initialize () = (xmlTag := [];
adamc@91 106 xmlString := false)
adamc@91 107
adamc@54 108
adamc@1 109 %%
adamc@1 110 %header (functor LacwebLexFn(structure Tokens : Lacweb_TOKENS));
adamc@1 111 %full
adamc@91 112 %s COMMENT STRING XML XMLTAG;
adamc@1 113
adamc@48 114 id = [a-z_][A-Za-z0-9_']*;
adamc@48 115 cid = [A-Z][A-Za-z0-9_']*;
adamc@1 116 ws = [\ \t\012];
adamc@14 117 intconst = [0-9]+;
adamc@14 118 realconst = [0-9]+\.[0-9]*;
adamc@91 119 notags = [^<{\n]+;
adamc@1 120
adamc@1 121 %%
adamc@1 122
adamc@54 123 <INITIAL> \n => (newline yypos;
adamc@1 124 continue ());
adamc@54 125 <COMMENT> \n => (newline yypos;
adamc@1 126 continue ());
adamc@91 127 <XMLTAG> \n => (newline yypos;
adamc@91 128 continue ());
adamc@91 129 <XML> \n => (newline yypos;
adamc@91 130 Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
adamc@1 131
adamc@1 132 <INITIAL> {ws}+ => (lex ());
adamc@1 133
adamc@1 134 <INITIAL> "(*" => (YYBEGIN COMMENT;
adamc@54 135 enterComment (pos yypos);
adamc@1 136 continue ());
adamc@54 137 <INITIAL> "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments";
adamc@1 138 continue ());
adamc@1 139
adamc@54 140 <COMMENT> "(*" => (enterComment (pos yypos);
adamc@1 141 continue ());
adamc@1 142 <COMMENT> "*)" => (if exitComment () then YYBEGIN INITIAL else ();
adamc@1 143 continue ());
adamc@1 144
adamc@229 145 <INITIAL> "\"" => (YYBEGIN STRING; strEnder := #"\""; strStart := pos yypos; str := []; continue());
adamc@229 146 <INITIAL> "'" => (YYBEGIN STRING; strEnder := #"'"; strStart := pos yypos; str := []; continue());
adamc@14 147 <STRING> "\\\"" => (str := #"\"" :: !str; continue());
adamc@229 148 <STRING> "\\'" => (str := #"'" :: !str; continue());
adamc@54 149 <STRING> "\n" => (newline yypos;
adamc@14 150 str := #"\n" :: !str; continue());
adamc@229 151 <STRING> . => (let
adamc@229 152 val ch = String.sub (yytext, 0)
adamc@229 153 in
adamc@229 154 if ch = !strEnder then
adamc@229 155 (if !xmlString then
adamc@229 156 (xmlString := false; YYBEGIN XMLTAG)
adamc@229 157 else
adamc@229 158 YYBEGIN INITIAL;
adamc@229 159 Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1))
adamc@229 160 else
adamc@229 161 (str := ch :: !str;
adamc@229 162 continue ())
adamc@229 163 end);
adamc@14 164
adamc@91 165 <INITIAL> "<" {id} ">"=> (let
adamc@91 166 val tag = String.substring (yytext, 1, size yytext - 2)
adamc@91 167 in
adamc@91 168 YYBEGIN XML;
adamc@91 169 xmlTag := tag :: (!xmlTag);
adamc@91 170 Tokens.XML_BEGIN (tag, yypos, yypos + size yytext)
adamc@91 171 end);
adamc@91 172 <XML> "</" {id} ">" => (let
adamc@91 173 val id = String.substring (yytext, 2, size yytext - 3)
adamc@91 174 in
adamc@91 175 case !xmlTag of
adamc@91 176 id' :: rest =>
adamc@91 177 if id = id' then
adamc@91 178 (YYBEGIN INITIAL;
adamc@91 179 xmlTag := rest;
adamc@91 180 Tokens.XML_END (yypos, yypos + size yytext))
adamc@91 181 else
adamc@91 182 Tokens.END_TAG (id, yypos, yypos + size yytext)
adamc@91 183 | _ =>
adamc@91 184 Tokens.END_TAG (id, yypos, yypos + size yytext)
adamc@91 185 end);
adamc@91 186
adamc@91 187 <XML> "<" {id} => (YYBEGIN XMLTAG;
adamc@91 188 Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE),
adamc@91 189 yypos, yypos + size yytext));
adamc@91 190
adamc@91 191 <XMLTAG> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
adamc@91 192 <XMLTAG> ">" => (YYBEGIN XML;
adamc@91 193 Tokens.GT (yypos, yypos + size yytext));
adamc@91 194
adamc@91 195 <XMLTAG> {ws}+ => (lex ());
adamc@91 196
adamc@91 197 <XMLTAG> {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext));
adamc@91 198 <XMLTAG> "=" => (Tokens.EQ (yypos, yypos + size yytext));
adamc@91 199
adamc@91 200 <XMLTAG> {intconst} => (case Int64.fromString yytext of
adamc@91 201 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
adamc@91 202 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 203 ("Expected int, received: " ^ yytext);
adamc@91 204 continue ()));
adamc@91 205 <XMLTAG> {realconst} => (case Real.fromString yytext of
adamc@91 206 SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext)
adamc@91 207 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 208 ("Expected float, received: " ^ yytext);
adamc@91 209 continue ()));
adamc@91 210 <XMLTAG> "\"" => (YYBEGIN STRING;
adamc@91 211 xmlString := true;
adamc@104 212 strStart := yypos; str := []; continue ());
adamc@91 213
adamc@91 214 <XMLTAG> "{" => (YYBEGIN INITIAL;
adamc@91 215 pushLevel (fn () => YYBEGIN XMLTAG);
adamc@91 216 Tokens.LBRACE (yypos, yypos + 1));
adamc@91 217 <XMLTAG> "(" => (YYBEGIN INITIAL;
adamc@91 218 pushLevel (fn () => YYBEGIN XMLTAG);
adamc@91 219 Tokens.LPAREN (yypos, yypos + 1));
adamc@91 220
adamc@91 221 <XMLTAG> . => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 222 ("illegal XML tag character: \"" ^ yytext ^ "\"");
adamc@91 223 continue ());
adamc@91 224
adamc@91 225 <XML> "{" => (YYBEGIN INITIAL;
adamc@91 226 pushLevel (fn () => YYBEGIN XML);
adamc@91 227 Tokens.LBRACE (yypos, yypos + 1));
adamc@91 228
adamc@91 229 <XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
adamc@91 230
adamc@91 231 <XML> . => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 232 ("illegal XML character: \"" ^ yytext ^ "\"");
adamc@91 233 continue ());
adamc@91 234
adamc@82 235 <INITIAL> "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext));
adamc@54 236 <INITIAL> "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext));
adamc@54 237 <INITIAL> ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext));
adamc@54 238 <INITIAL> "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext));
adamc@54 239 <INITIAL> "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext));
adamc@110 240 <INITIAL> "{" => (enterBrace ();
adamc@110 241 Tokens.LBRACE (pos yypos, pos yypos + size yytext));
adamc@110 242 <INITIAL> "}" => (exitBrace ();
adamc@110 243 Tokens.RBRACE (pos yypos, pos yypos + size yytext));
adamc@1 244
adamc@54 245 <INITIAL> "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext));
adamc@54 246 <INITIAL> "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext));
adamc@54 247 <INITIAL> "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext));
adamc@149 248 <INITIAL> "--" => (Tokens.MINUSMINUS (pos yypos, pos yypos + size yytext));
adamc@1 249
adamc@54 250 <INITIAL> "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext));
adamc@219 251 <INITIAL> "<>" => (Tokens.NE (pos yypos, pos yypos + size yytext));
adamc@219 252 <INITIAL> "<" => (Tokens.LT (pos yypos, pos yypos + size yytext));
adamc@219 253 <INITIAL> ">" => (Tokens.GT (pos yypos, pos yypos + size yytext));
adamc@219 254 <INITIAL> "<=" => (Tokens.LE (pos yypos, pos yypos + size yytext));
adamc@219 255 <INITIAL> ">=" => (Tokens.GE (pos yypos, pos yypos + size yytext));
adamc@54 256 <INITIAL> "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext));
adamc@54 257 <INITIAL> ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext));
adamc@54 258 <INITIAL> "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext));
adamc@54 259 <INITIAL> ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext));
adamc@174 260 <INITIAL> "..." => (Tokens.DOTDOTDOT (pos yypos, pos yypos + size yytext));
adamc@54 261 <INITIAL> "." => (Tokens.DOT (pos yypos, pos yypos + size yytext));
adamc@54 262 <INITIAL> "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext));
adamc@54 263 <INITIAL> "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext));
adamc@54 264 <INITIAL> "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext));
adamc@54 265 <INITIAL> "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext));
adamc@84 266 <INITIAL> "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext));
adamc@156 267 <INITIAL> "|" => (Tokens.BAR (pos yypos, pos yypos + size yytext));
adamc@195 268 <INITIAL> "*" => (Tokens.STAR (pos yypos, pos yypos + size yytext));
adamc@1 269
adamc@54 270 <INITIAL> "con" => (Tokens.CON (pos yypos, pos yypos + size yytext));
adamc@54 271 <INITIAL> "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext));
adamc@156 272 <INITIAL> "datatype" => (Tokens.DATATYPE (pos yypos, pos yypos + size yytext));
adamc@156 273 <INITIAL> "of" => (Tokens.OF (pos yypos, pos yypos + size yytext));
adamc@54 274 <INITIAL> "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext));
adamc@123 275 <INITIAL> "rec" => (Tokens.REC (pos yypos, pos yypos + size yytext));
adamc@123 276 <INITIAL> "and" => (Tokens.AND (pos yypos, pos yypos + size yytext));
adamc@54 277 <INITIAL> "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext));
adamc@67 278 <INITIAL> "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext));
adamc@170 279 <INITIAL> "case" => (Tokens.CASE (pos yypos, pos yypos + size yytext));
adamc@190 280 <INITIAL> "if" => (Tokens.IF (pos yypos, pos yypos + size yytext));
adamc@190 281 <INITIAL> "then" => (Tokens.THEN (pos yypos, pos yypos + size yytext));
adamc@190 282 <INITIAL> "else" => (Tokens.ELSE (pos yypos, pos yypos + size yytext));
adamc@1 283
adamc@54 284 <INITIAL> "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext));
adamc@54 285 <INITIAL> "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext));
adamc@54 286 <INITIAL> "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext));
adamc@54 287 <INITIAL> "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext));
adamc@54 288 <INITIAL> "end" => (Tokens.END (pos yypos, pos yypos + size yytext));
adamc@54 289 <INITIAL> "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext));
adamc@54 290 <INITIAL> "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext));
adamc@54 291 <INITIAL> "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext));
adamc@58 292 <INITIAL> "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext));
adamc@58 293 <INITIAL> "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext));
adamc@88 294 <INITIAL> "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext));
adamc@88 295 <INITIAL> "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext));
adamc@109 296 <INITIAL> "export" => (Tokens.EXPORT (pos yypos, pos yypos + size yytext));
adamc@203 297 <INITIAL> "table" => (Tokens.TABLE (pos yypos, pos yypos + size yytext));
adamc@211 298 <INITIAL> "class" => (Tokens.CLASS (pos yypos, pos yypos + size yytext));
adamc@30 299
adamc@54 300 <INITIAL> "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext));
adamc@54 301 <INITIAL> "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext));
adamc@82 302 <INITIAL> "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext));
adamc@1 303
adamc@204 304 <INITIAL> "SELECT" => (Tokens.SELECT (pos yypos, pos yypos + size yytext));
adamc@204 305 <INITIAL> "FROM" => (Tokens.FROM (pos yypos, pos yypos + size yytext));
adamc@204 306 <INITIAL> "AS" => (Tokens.AS (pos yypos, pos yypos + size yytext));
adamc@209 307 <INITIAL> "WHERE" => (Tokens.CWHERE (pos yypos, pos yypos + size yytext));
adamc@226 308 <INITIAL> "GROUP" => (Tokens.GROUP (pos yypos, pos yypos + size yytext));
adamc@230 309 <INITIAL> "ORDER" => (Tokens.ORDER (pos yypos, pos yypos + size yytext));
adamc@226 310 <INITIAL> "BY" => (Tokens.BY (pos yypos, pos yypos + size yytext));
adamc@227 311 <INITIAL> "HAVING" => (Tokens.HAVING (pos yypos, pos yypos + size yytext));
adamc@231 312 <INITIAL> "LIMIT" => (Tokens.LIMIT (pos yypos, pos yypos + size yytext));
adamc@231 313 <INITIAL> "OFFSET" => (Tokens.OFFSET (pos yypos, pos yypos + size yytext));
adamc@232 314 <INITIAL> "ALL" => (Tokens.ALL (pos yypos, pos yypos + size yytext));
adamc@209 315
adamc@229 316 <INITIAL> "UNION" => (Tokens.UNION (pos yypos, pos yypos + size yytext));
adamc@229 317 <INITIAL> "INTERSECT" => (Tokens.INTERSECT (pos yypos, pos yypos + size yytext));
adamc@229 318 <INITIAL> "EXCEPT" => (Tokens.EXCEPT (pos yypos, pos yypos + size yytext));
adamc@229 319
adamc@209 320 <INITIAL> "TRUE" => (Tokens.TRUE (pos yypos, pos yypos + size yytext));
adamc@209 321 <INITIAL> "FALSE" => (Tokens.FALSE (pos yypos, pos yypos + size yytext));
adamc@220 322 <INITIAL> "AND" => (Tokens.CAND (pos yypos, pos yypos + size yytext));
adamc@220 323 <INITIAL> "OR" => (Tokens.OR (pos yypos, pos yypos + size yytext));
adamc@220 324 <INITIAL> "NOT" => (Tokens.NOT (pos yypos, pos yypos + size yytext));
adamc@204 325
adamc@235 326 <INITIAL> "COUNT" => (Tokens.COUNT (pos yypos, pos yypos + size yytext));
adamc@235 327
adamc@54 328 <INITIAL> {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext));
adamc@54 329 <INITIAL> {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext));
adamc@1 330
adamc@14 331 <INITIAL> {intconst} => (case Int64.fromString yytext of
adamc@120 332 SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext)
adamc@120 333 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
adamc@120 334 ("Expected int, received: " ^ yytext);
adamc@120 335 continue ()));
adamc@14 336 <INITIAL> {realconst} => (case Real64.fromString yytext of
adamc@54 337 SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext)
adamc@54 338 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
adamc@14 339 ("Expected float, received: " ^ yytext);
adamc@14 340 continue ()));
adamc@14 341
adamc@1 342 <COMMENT> . => (continue());
adamc@1 343
adamc@54 344 <INITIAL> . => (ErrorMsg.errorAt' (pos yypos, pos yypos)
adamc@1 345 ("illegal character: \"" ^ yytext ^ "\"");
adamc@1 346 continue ());