comparison src/urweb.lex @ 244:71bafe66dbe1

Laconic -> Ur
author Adam Chlipala <adamc@hcoop.net>
date Sun, 31 Aug 2008 08:32:18 -0400
parents src/lacweb.lex@2b9dfaffb008
children bacd0ba869e1
comparison
equal deleted inserted replaced
243:2b9dfaffb008 244:71bafe66dbe1
1 (* Copyright (c) 2008, Adam Chlipala
2 * All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * - Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * - Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
12 * - The names of contributors may not be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
26 *)
27
28 (* Lexing info for Ur/Web programs *)
29
30 type pos = int
31 type svalue = Tokens.svalue
32 type ('a,'b) token = ('a,'b) Tokens.token
33 type lexresult = (svalue,pos) Tokens.token
34
35 local
36 val commentLevel = ref 0
37 val commentPos = ref 0
38 in
39 fun enterComment pos =
40 (if !commentLevel = 0 then
41 commentPos := pos
42 else
43 ();
44 commentLevel := !commentLevel + 1)
45
46 fun exitComment () =
47 (ignore (commentLevel := !commentLevel - 1);
48 !commentLevel = 0)
49
50 fun eof () =
51 let
52 val pos = ErrorMsg.lastLineStart ()
53 in
54 if !commentLevel > 0 then
55 ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment"
56 else
57 ();
58 Tokens.EOF (pos, pos)
59 end
60 end
61
62 val strEnder = ref #"\""
63 val str = ref ([] : char list)
64 val strStart = ref 0
65
66 local
67 val initSig = ref false
68 val offset = ref 0
69 in
70
71 fun initialSig () = initSig := true
72
73 fun pos yypos = yypos - !offset
74
75 fun newline yypos =
76 if !initSig then
77 (initSig := false;
78 offset := yypos + 1)
79 else
80 ErrorMsg.newline (pos yypos)
81
82 end
83
84 val xmlTag = ref ([] : string list)
85 val xmlString = ref true
86 val braceLevels = ref ([] : ((unit -> unit) * int) list)
87
88 fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels)
89
90 fun enterBrace () =
91 case !braceLevels of
92 (s, i) :: rest => braceLevels := (s, i+1) :: rest
93 | _ => ()
94
95 fun exitBrace () =
96 case !braceLevels of
97 (s, i) :: rest =>
98 if i = 1 then
99 (braceLevels := rest;
100 s ())
101 else
102 braceLevels := (s, i-1) :: rest
103 | _ => ()
104
105 fun initialize () = (xmlTag := [];
106 xmlString := false)
107
108
109 %%
110 %header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS));
111 %full
112 %s COMMENT STRING XML XMLTAG;
113
114 id = [a-z_][A-Za-z0-9_']*;
115 cid = [A-Z][A-Za-z0-9_']*;
116 ws = [\ \t\012];
117 intconst = [0-9]+;
118 realconst = [0-9]+\.[0-9]*;
119 notags = [^<{\n]+;
120
121 %%
122
123 <INITIAL> \n => (newline yypos;
124 continue ());
125 <COMMENT> \n => (newline yypos;
126 continue ());
127 <XMLTAG> \n => (newline yypos;
128 continue ());
129 <XML> \n => (newline yypos;
130 Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
131
132 <INITIAL> {ws}+ => (lex ());
133
134 <INITIAL> "(*" => (YYBEGIN COMMENT;
135 enterComment (pos yypos);
136 continue ());
137 <INITIAL> "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments";
138 continue ());
139
140 <COMMENT> "(*" => (enterComment (pos yypos);
141 continue ());
142 <COMMENT> "*)" => (if exitComment () then YYBEGIN INITIAL else ();
143 continue ());
144
145 <INITIAL> "\"" => (YYBEGIN STRING; strEnder := #"\""; strStart := pos yypos; str := []; continue());
146 <INITIAL> "'" => (YYBEGIN STRING; strEnder := #"'"; strStart := pos yypos; str := []; continue());
147 <STRING> "\\\"" => (str := #"\"" :: !str; continue());
148 <STRING> "\\'" => (str := #"'" :: !str; continue());
149 <STRING> "\n" => (newline yypos;
150 str := #"\n" :: !str; continue());
151 <STRING> . => (let
152 val ch = String.sub (yytext, 0)
153 in
154 if ch = !strEnder then
155 (if !xmlString then
156 (xmlString := false; YYBEGIN XMLTAG)
157 else
158 YYBEGIN INITIAL;
159 Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1))
160 else
161 (str := ch :: !str;
162 continue ())
163 end);
164
165 <INITIAL> "<" {id} ">"=> (let
166 val tag = String.substring (yytext, 1, size yytext - 2)
167 in
168 YYBEGIN XML;
169 xmlTag := tag :: (!xmlTag);
170 Tokens.XML_BEGIN (tag, yypos, yypos + size yytext)
171 end);
172 <XML> "</" {id} ">" => (let
173 val id = String.substring (yytext, 2, size yytext - 3)
174 in
175 case !xmlTag of
176 id' :: rest =>
177 if id = id' then
178 (YYBEGIN INITIAL;
179 xmlTag := rest;
180 Tokens.XML_END (yypos, yypos + size yytext))
181 else
182 Tokens.END_TAG (id, yypos, yypos + size yytext)
183 | _ =>
184 Tokens.END_TAG (id, yypos, yypos + size yytext)
185 end);
186
187 <XML> "<" {id} => (YYBEGIN XMLTAG;
188 Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE),
189 yypos, yypos + size yytext));
190
191 <XMLTAG> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
192 <XMLTAG> ">" => (YYBEGIN XML;
193 Tokens.GT (yypos, yypos + size yytext));
194
195 <XMLTAG> {ws}+ => (lex ());
196
197 <XMLTAG> {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext));
198 <XMLTAG> "=" => (Tokens.EQ (yypos, yypos + size yytext));
199
200 <XMLTAG> {intconst} => (case Int64.fromString yytext of
201 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
202 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
203 ("Expected int, received: " ^ yytext);
204 continue ()));
205 <XMLTAG> {realconst} => (case Real.fromString yytext of
206 SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext)
207 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
208 ("Expected float, received: " ^ yytext);
209 continue ()));
210 <XMLTAG> "\"" => (YYBEGIN STRING;
211 xmlString := true;
212 strStart := yypos; str := []; continue ());
213
214 <XMLTAG> "{" => (YYBEGIN INITIAL;
215 pushLevel (fn () => YYBEGIN XMLTAG);
216 Tokens.LBRACE (yypos, yypos + 1));
217 <XMLTAG> "(" => (YYBEGIN INITIAL;
218 pushLevel (fn () => YYBEGIN XMLTAG);
219 Tokens.LPAREN (yypos, yypos + 1));
220
221 <XMLTAG> . => (ErrorMsg.errorAt' (yypos, yypos)
222 ("illegal XML tag character: \"" ^ yytext ^ "\"");
223 continue ());
224
225 <XML> "{" => (YYBEGIN INITIAL;
226 pushLevel (fn () => YYBEGIN XML);
227 Tokens.LBRACE (yypos, yypos + 1));
228
229 <XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
230
231 <XML> . => (ErrorMsg.errorAt' (yypos, yypos)
232 ("illegal XML character: \"" ^ yytext ^ "\"");
233 continue ());
234
235 <INITIAL> "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext));
236 <INITIAL> "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext));
237 <INITIAL> ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext));
238 <INITIAL> "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext));
239 <INITIAL> "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext));
240 <INITIAL> "{" => (enterBrace ();
241 Tokens.LBRACE (pos yypos, pos yypos + size yytext));
242 <INITIAL> "}" => (exitBrace ();
243 Tokens.RBRACE (pos yypos, pos yypos + size yytext));
244
245 <INITIAL> "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext));
246 <INITIAL> "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext));
247 <INITIAL> "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext));
248 <INITIAL> "--" => (Tokens.MINUSMINUS (pos yypos, pos yypos + size yytext));
249
250 <INITIAL> "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext));
251 <INITIAL> "<>" => (Tokens.NE (pos yypos, pos yypos + size yytext));
252 <INITIAL> "<" => (Tokens.LT (pos yypos, pos yypos + size yytext));
253 <INITIAL> ">" => (Tokens.GT (pos yypos, pos yypos + size yytext));
254 <INITIAL> "<=" => (Tokens.LE (pos yypos, pos yypos + size yytext));
255 <INITIAL> ">=" => (Tokens.GE (pos yypos, pos yypos + size yytext));
256 <INITIAL> "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext));
257 <INITIAL> ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext));
258 <INITIAL> "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext));
259 <INITIAL> ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext));
260 <INITIAL> "..." => (Tokens.DOTDOTDOT (pos yypos, pos yypos + size yytext));
261 <INITIAL> "." => (Tokens.DOT (pos yypos, pos yypos + size yytext));
262 <INITIAL> "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext));
263 <INITIAL> "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext));
264 <INITIAL> "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext));
265 <INITIAL> "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext));
266 <INITIAL> "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext));
267 <INITIAL> "|" => (Tokens.BAR (pos yypos, pos yypos + size yytext));
268 <INITIAL> "*" => (Tokens.STAR (pos yypos, pos yypos + size yytext));
269 <INITIAL> "<-" => (Tokens.LARROW (pos yypos, pos yypos + size yytext));
270 <INITIAL> ";" => (Tokens.SEMI (pos yypos, pos yypos + size yytext));
271
272 <INITIAL> "con" => (Tokens.CON (pos yypos, pos yypos + size yytext));
273 <INITIAL> "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext));
274 <INITIAL> "datatype" => (Tokens.DATATYPE (pos yypos, pos yypos + size yytext));
275 <INITIAL> "of" => (Tokens.OF (pos yypos, pos yypos + size yytext));
276 <INITIAL> "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext));
277 <INITIAL> "rec" => (Tokens.REC (pos yypos, pos yypos + size yytext));
278 <INITIAL> "and" => (Tokens.AND (pos yypos, pos yypos + size yytext));
279 <INITIAL> "fun" => (Tokens.FUN (pos yypos, pos yypos + size yytext));
280 <INITIAL> "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext));
281 <INITIAL> "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext));
282 <INITIAL> "case" => (Tokens.CASE (pos yypos, pos yypos + size yytext));
283 <INITIAL> "if" => (Tokens.IF (pos yypos, pos yypos + size yytext));
284 <INITIAL> "then" => (Tokens.THEN (pos yypos, pos yypos + size yytext));
285 <INITIAL> "else" => (Tokens.ELSE (pos yypos, pos yypos + size yytext));
286
287 <INITIAL> "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext));
288 <INITIAL> "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext));
289 <INITIAL> "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext));
290 <INITIAL> "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext));
291 <INITIAL> "end" => (Tokens.END (pos yypos, pos yypos + size yytext));
292 <INITIAL> "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext));
293 <INITIAL> "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext));
294 <INITIAL> "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext));
295 <INITIAL> "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext));
296 <INITIAL> "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext));
297 <INITIAL> "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext));
298 <INITIAL> "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext));
299 <INITIAL> "export" => (Tokens.EXPORT (pos yypos, pos yypos + size yytext));
300 <INITIAL> "table" => (Tokens.TABLE (pos yypos, pos yypos + size yytext));
301 <INITIAL> "class" => (Tokens.CLASS (pos yypos, pos yypos + size yytext));
302
303 <INITIAL> "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext));
304 <INITIAL> "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext));
305 <INITIAL> "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext));
306
307 <INITIAL> "SELECT" => (Tokens.SELECT (pos yypos, pos yypos + size yytext));
308 <INITIAL> "FROM" => (Tokens.FROM (pos yypos, pos yypos + size yytext));
309 <INITIAL> "AS" => (Tokens.AS (pos yypos, pos yypos + size yytext));
310 <INITIAL> "WHERE" => (Tokens.CWHERE (pos yypos, pos yypos + size yytext));
311 <INITIAL> "GROUP" => (Tokens.GROUP (pos yypos, pos yypos + size yytext));
312 <INITIAL> "ORDER" => (Tokens.ORDER (pos yypos, pos yypos + size yytext));
313 <INITIAL> "BY" => (Tokens.BY (pos yypos, pos yypos + size yytext));
314 <INITIAL> "HAVING" => (Tokens.HAVING (pos yypos, pos yypos + size yytext));
315 <INITIAL> "LIMIT" => (Tokens.LIMIT (pos yypos, pos yypos + size yytext));
316 <INITIAL> "OFFSET" => (Tokens.OFFSET (pos yypos, pos yypos + size yytext));
317 <INITIAL> "ALL" => (Tokens.ALL (pos yypos, pos yypos + size yytext));
318
319 <INITIAL> "UNION" => (Tokens.UNION (pos yypos, pos yypos + size yytext));
320 <INITIAL> "INTERSECT" => (Tokens.INTERSECT (pos yypos, pos yypos + size yytext));
321 <INITIAL> "EXCEPT" => (Tokens.EXCEPT (pos yypos, pos yypos + size yytext));
322
323 <INITIAL> "TRUE" => (Tokens.TRUE (pos yypos, pos yypos + size yytext));
324 <INITIAL> "FALSE" => (Tokens.FALSE (pos yypos, pos yypos + size yytext));
325 <INITIAL> "AND" => (Tokens.CAND (pos yypos, pos yypos + size yytext));
326 <INITIAL> "OR" => (Tokens.OR (pos yypos, pos yypos + size yytext));
327 <INITIAL> "NOT" => (Tokens.NOT (pos yypos, pos yypos + size yytext));
328
329 <INITIAL> "COUNT" => (Tokens.COUNT (pos yypos, pos yypos + size yytext));
330 <INITIAL> "AVG" => (Tokens.AVG (pos yypos, pos yypos + size yytext));
331 <INITIAL> "SUM" => (Tokens.SUM (pos yypos, pos yypos + size yytext));
332 <INITIAL> "MIN" => (Tokens.MIN (pos yypos, pos yypos + size yytext));
333 <INITIAL> "MAX" => (Tokens.MAX (pos yypos, pos yypos + size yytext));
334
335 <INITIAL> {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext));
336 <INITIAL> {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext));
337
338 <INITIAL> {intconst} => (case Int64.fromString yytext of
339 SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext)
340 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
341 ("Expected int, received: " ^ yytext);
342 continue ()));
343 <INITIAL> {realconst} => (case Real64.fromString yytext of
344 SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext)
345 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
346 ("Expected float, received: " ^ yytext);
347 continue ()));
348
349 <COMMENT> . => (continue());
350
351 <INITIAL> . => (ErrorMsg.errorAt' (pos yypos, pos yypos)
352 ("illegal character: \"" ^ yytext ^ "\"");
353 continue ());