annotate src/lacweb.lex @ 156:34ccd7d2bea8

Start of datatype support
author Adam Chlipala <adamc@hcoop.net>
date Thu, 24 Jul 2008 15:02:03 -0400
parents 7420fa18d657
children a158f8c5aa55
rev   line source
adamc@1 1 (* Copyright (c) 2008, Adam Chlipala
adamc@1 2 * All rights reserved.
adamc@1 3 *
adamc@1 4 * Redistribution and use in source and binary forms, with or without
adamc@1 5 * modification, are permitted provided that the following conditions are met:
adamc@1 6 *
adamc@1 7 * - Redistributions of source code must retain the above copyright notice,
adamc@1 8 * this list of conditions and the following disclaimer.
adamc@1 9 * - Redistributions in binary form must reproduce the above copyright notice,
adamc@1 10 * this list of conditions and the following disclaimer in the documentation
adamc@1 11 * and/or other materials provided with the distribution.
adamc@1 12 * - The names of contributors may not be used to endorse or promote products
adamc@1 13 * derived from this software without specific prior written permission.
adamc@1 14 *
adamc@1 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
adamc@1 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
adamc@1 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
adamc@1 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
adamc@1 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
adamc@1 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
adamc@1 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
adamc@1 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
adamc@1 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
adamc@1 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
adamc@1 25 * POSSIBILITY OF SUCH DAMAGE.
adamc@1 26 *)
adamc@1 27
adamc@1 28 (* Lexing info for Laconic/Web programs *)
adamc@1 29
adamc@1 30 type pos = int
adamc@1 31 type svalue = Tokens.svalue
adamc@1 32 type ('a,'b) token = ('a,'b) Tokens.token
adamc@1 33 type lexresult = (svalue,pos) Tokens.token
adamc@1 34
adamc@1 35 local
adamc@1 36 val commentLevel = ref 0
adamc@1 37 val commentPos = ref 0
adamc@1 38 in
adamc@1 39 fun enterComment pos =
adamc@1 40 (if !commentLevel = 0 then
adamc@1 41 commentPos := pos
adamc@1 42 else
adamc@1 43 ();
adamc@1 44 commentLevel := !commentLevel + 1)
adamc@1 45
adamc@1 46 fun exitComment () =
adamc@1 47 (ignore (commentLevel := !commentLevel - 1);
adamc@1 48 !commentLevel = 0)
adamc@1 49
adamc@1 50 fun eof () =
adamc@1 51 let
adamc@1 52 val pos = ErrorMsg.lastLineStart ()
adamc@1 53 in
adamc@1 54 if !commentLevel > 0 then
adamc@1 55 ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment"
adamc@1 56 else
adamc@1 57 ();
adamc@1 58 Tokens.EOF (pos, pos)
adamc@1 59 end
adamc@1 60 end
adamc@1 61
adamc@14 62 val str = ref ([] : char list)
adamc@14 63 val strStart = ref 0
adamc@14 64
adamc@54 65 local
adamc@54 66 val initSig = ref false
adamc@54 67 val offset = ref 0
adamc@54 68 in
adamc@54 69
adamc@54 70 fun initialSig () = initSig := true
adamc@54 71
adamc@54 72 fun pos yypos = yypos - !offset
adamc@54 73
adamc@54 74 fun newline yypos =
adamc@54 75 if !initSig then
adamc@54 76 (initSig := false;
adamc@54 77 offset := yypos + 1)
adamc@54 78 else
adamc@54 79 ErrorMsg.newline (pos yypos)
adamc@54 80
adamc@54 81 end
adamc@54 82
adamc@91 83 val xmlTag = ref ([] : string list)
adamc@91 84 val xmlString = ref true
adamc@91 85 val braceLevels = ref ([] : ((unit -> unit) * int) list)
adamc@91 86
adamc@91 87 fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels)
adamc@91 88
adamc@91 89 fun enterBrace () =
adamc@91 90 case !braceLevels of
adamc@91 91 (s, i) :: rest => braceLevels := (s, i+1) :: rest
adamc@91 92 | _ => ()
adamc@91 93
adamc@91 94 fun exitBrace () =
adamc@91 95 case !braceLevels of
adamc@91 96 (s, i) :: rest =>
adamc@91 97 if i = 1 then
adamc@91 98 (braceLevels := rest;
adamc@91 99 s ())
adamc@91 100 else
adamc@91 101 braceLevels := (s, i-1) :: rest
adamc@91 102 | _ => ()
adamc@91 103
adamc@91 104 fun initialize () = (xmlTag := [];
adamc@91 105 xmlString := false)
adamc@91 106
adamc@54 107
adamc@1 108 %%
adamc@1 109 %header (functor LacwebLexFn(structure Tokens : Lacweb_TOKENS));
adamc@1 110 %full
adamc@91 111 %s COMMENT STRING XML XMLTAG;
adamc@1 112
adamc@48 113 id = [a-z_][A-Za-z0-9_']*;
adamc@48 114 cid = [A-Z][A-Za-z0-9_']*;
adamc@1 115 ws = [\ \t\012];
adamc@14 116 intconst = [0-9]+;
adamc@14 117 realconst = [0-9]+\.[0-9]*;
adamc@91 118 notags = [^<{\n]+;
adamc@1 119
adamc@1 120 %%
adamc@1 121
adamc@54 122 <INITIAL> \n => (newline yypos;
adamc@1 123 continue ());
adamc@54 124 <COMMENT> \n => (newline yypos;
adamc@1 125 continue ());
adamc@91 126 <XMLTAG> \n => (newline yypos;
adamc@91 127 continue ());
adamc@91 128 <XML> \n => (newline yypos;
adamc@91 129 Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
adamc@1 130
adamc@1 131 <INITIAL> {ws}+ => (lex ());
adamc@1 132
adamc@1 133 <INITIAL> "(*" => (YYBEGIN COMMENT;
adamc@54 134 enterComment (pos yypos);
adamc@1 135 continue ());
adamc@54 136 <INITIAL> "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments";
adamc@1 137 continue ());
adamc@1 138
adamc@54 139 <COMMENT> "(*" => (enterComment (pos yypos);
adamc@1 140 continue ());
adamc@1 141 <COMMENT> "*)" => (if exitComment () then YYBEGIN INITIAL else ();
adamc@1 142 continue ());
adamc@1 143
adamc@54 144 <INITIAL> "\"" => (YYBEGIN STRING; strStart := pos yypos; str := []; continue());
adamc@14 145 <STRING> "\\\"" => (str := #"\"" :: !str; continue());
adamc@104 146 <STRING> "\"" => (if !xmlString then
adamc@104 147 (xmlString := false; YYBEGIN XMLTAG)
adamc@104 148 else
adamc@104 149 YYBEGIN INITIAL;
adamc@54 150 Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1));
adamc@54 151 <STRING> "\n" => (newline yypos;
adamc@14 152 str := #"\n" :: !str; continue());
adamc@14 153 <STRING> . => (str := String.sub (yytext, 0) :: !str; continue());
adamc@14 154
adamc@91 155 <INITIAL> "<" {id} ">"=> (let
adamc@91 156 val tag = String.substring (yytext, 1, size yytext - 2)
adamc@91 157 in
adamc@91 158 YYBEGIN XML;
adamc@91 159 xmlTag := tag :: (!xmlTag);
adamc@91 160 Tokens.XML_BEGIN (tag, yypos, yypos + size yytext)
adamc@91 161 end);
adamc@91 162 <XML> "</" {id} ">" => (let
adamc@91 163 val id = String.substring (yytext, 2, size yytext - 3)
adamc@91 164 in
adamc@91 165 case !xmlTag of
adamc@91 166 id' :: rest =>
adamc@91 167 if id = id' then
adamc@91 168 (YYBEGIN INITIAL;
adamc@91 169 xmlTag := rest;
adamc@91 170 Tokens.XML_END (yypos, yypos + size yytext))
adamc@91 171 else
adamc@91 172 Tokens.END_TAG (id, yypos, yypos + size yytext)
adamc@91 173 | _ =>
adamc@91 174 Tokens.END_TAG (id, yypos, yypos + size yytext)
adamc@91 175 end);
adamc@91 176
adamc@91 177 <XML> "<" {id} => (YYBEGIN XMLTAG;
adamc@91 178 Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE),
adamc@91 179 yypos, yypos + size yytext));
adamc@91 180
adamc@91 181 <XMLTAG> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
adamc@91 182 <XMLTAG> ">" => (YYBEGIN XML;
adamc@91 183 Tokens.GT (yypos, yypos + size yytext));
adamc@91 184
adamc@91 185 <XMLTAG> {ws}+ => (lex ());
adamc@91 186
adamc@91 187 <XMLTAG> {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext));
adamc@91 188 <XMLTAG> "=" => (Tokens.EQ (yypos, yypos + size yytext));
adamc@91 189
adamc@91 190 <XMLTAG> {intconst} => (case Int64.fromString yytext of
adamc@91 191 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
adamc@91 192 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 193 ("Expected int, received: " ^ yytext);
adamc@91 194 continue ()));
adamc@91 195 <XMLTAG> {realconst} => (case Real.fromString yytext of
adamc@91 196 SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext)
adamc@91 197 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 198 ("Expected float, received: " ^ yytext);
adamc@91 199 continue ()));
adamc@91 200 <XMLTAG> "\"" => (YYBEGIN STRING;
adamc@91 201 xmlString := true;
adamc@104 202 strStart := yypos; str := []; continue ());
adamc@91 203
adamc@91 204 <XMLTAG> "{" => (YYBEGIN INITIAL;
adamc@91 205 pushLevel (fn () => YYBEGIN XMLTAG);
adamc@91 206 Tokens.LBRACE (yypos, yypos + 1));
adamc@91 207 <XMLTAG> "(" => (YYBEGIN INITIAL;
adamc@91 208 pushLevel (fn () => YYBEGIN XMLTAG);
adamc@91 209 Tokens.LPAREN (yypos, yypos + 1));
adamc@91 210
adamc@91 211 <XMLTAG> . => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 212 ("illegal XML tag character: \"" ^ yytext ^ "\"");
adamc@91 213 continue ());
adamc@91 214
adamc@91 215 <XML> "{" => (YYBEGIN INITIAL;
adamc@91 216 pushLevel (fn () => YYBEGIN XML);
adamc@91 217 Tokens.LBRACE (yypos, yypos + 1));
adamc@91 218
adamc@91 219 <XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
adamc@91 220
adamc@91 221 <XML> . => (ErrorMsg.errorAt' (yypos, yypos)
adamc@91 222 ("illegal XML character: \"" ^ yytext ^ "\"");
adamc@91 223 continue ());
adamc@91 224
adamc@82 225 <INITIAL> "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext));
adamc@54 226 <INITIAL> "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext));
adamc@54 227 <INITIAL> ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext));
adamc@54 228 <INITIAL> "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext));
adamc@54 229 <INITIAL> "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext));
adamc@110 230 <INITIAL> "{" => (enterBrace ();
adamc@110 231 Tokens.LBRACE (pos yypos, pos yypos + size yytext));
adamc@110 232 <INITIAL> "}" => (exitBrace ();
adamc@110 233 Tokens.RBRACE (pos yypos, pos yypos + size yytext));
adamc@1 234
adamc@54 235 <INITIAL> "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext));
adamc@54 236 <INITIAL> "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext));
adamc@54 237 <INITIAL> "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext));
adamc@149 238 <INITIAL> "--" => (Tokens.MINUSMINUS (pos yypos, pos yypos + size yytext));
adamc@1 239
adamc@54 240 <INITIAL> "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext));
adamc@54 241 <INITIAL> "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext));
adamc@54 242 <INITIAL> ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext));
adamc@54 243 <INITIAL> "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext));
adamc@54 244 <INITIAL> ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext));
adamc@54 245 <INITIAL> "." => (Tokens.DOT (pos yypos, pos yypos + size yytext));
adamc@54 246 <INITIAL> "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext));
adamc@54 247 <INITIAL> "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext));
adamc@54 248 <INITIAL> "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext));
adamc@54 249 <INITIAL> "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext));
adamc@84 250 <INITIAL> "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext));
adamc@156 251 <INITIAL> "|" => (Tokens.BAR (pos yypos, pos yypos + size yytext));
adamc@1 252
adamc@54 253 <INITIAL> "con" => (Tokens.CON (pos yypos, pos yypos + size yytext));
adamc@54 254 <INITIAL> "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext));
adamc@156 255 <INITIAL> "datatype" => (Tokens.DATATYPE (pos yypos, pos yypos + size yytext));
adamc@156 256 <INITIAL> "of" => (Tokens.OF (pos yypos, pos yypos + size yytext));
adamc@54 257 <INITIAL> "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext));
adamc@123 258 <INITIAL> "rec" => (Tokens.REC (pos yypos, pos yypos + size yytext));
adamc@123 259 <INITIAL> "and" => (Tokens.AND (pos yypos, pos yypos + size yytext));
adamc@54 260 <INITIAL> "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext));
adamc@67 261 <INITIAL> "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext));
adamc@1 262
adamc@54 263 <INITIAL> "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext));
adamc@54 264 <INITIAL> "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext));
adamc@54 265 <INITIAL> "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext));
adamc@54 266 <INITIAL> "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext));
adamc@54 267 <INITIAL> "end" => (Tokens.END (pos yypos, pos yypos + size yytext));
adamc@54 268 <INITIAL> "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext));
adamc@54 269 <INITIAL> "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext));
adamc@54 270 <INITIAL> "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext));
adamc@58 271 <INITIAL> "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext));
adamc@58 272 <INITIAL> "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext));
adamc@88 273 <INITIAL> "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext));
adamc@88 274 <INITIAL> "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext));
adamc@109 275 <INITIAL> "export" => (Tokens.EXPORT (pos yypos, pos yypos + size yytext));
adamc@30 276
adamc@54 277 <INITIAL> "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext));
adamc@54 278 <INITIAL> "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext));
adamc@82 279 <INITIAL> "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext));
adamc@1 280
adamc@54 281 <INITIAL> {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext));
adamc@54 282 <INITIAL> {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext));
adamc@1 283
adamc@14 284 <INITIAL> {intconst} => (case Int64.fromString yytext of
adamc@120 285 SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext)
adamc@120 286 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
adamc@120 287 ("Expected int, received: " ^ yytext);
adamc@120 288 continue ()));
adamc@14 289 <INITIAL> {realconst} => (case Real64.fromString yytext of
adamc@54 290 SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext)
adamc@54 291 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
adamc@14 292 ("Expected float, received: " ^ yytext);
adamc@14 293 continue ()));
adamc@14 294
adamc@1 295 <COMMENT> . => (continue());
adamc@1 296
adamc@54 297 <INITIAL> . => (ErrorMsg.errorAt' (pos yypos, pos yypos)
adamc@1 298 ("illegal character: \"" ^ yytext ^ "\"");
adamc@1 299 continue ());