Mercurial > urweb
comparison src/urweb.lex @ 244:71bafe66dbe1
Laconic -> Ur
author | Adam Chlipala <adamc@hcoop.net> |
---|---|
date | Sun, 31 Aug 2008 08:32:18 -0400 |
parents | src/lacweb.lex@2b9dfaffb008 |
children | bacd0ba869e1 |
comparison
equal
deleted
inserted
replaced
243:2b9dfaffb008 | 244:71bafe66dbe1 |
---|---|
1 (* Copyright (c) 2008, Adam Chlipala | |
2 * All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are met: | |
6 * | |
7 * - Redistributions of source code must retain the above copyright notice, | |
8 * this list of conditions and the following disclaimer. | |
9 * - Redistributions in binary form must reproduce the above copyright notice, | |
10 * this list of conditions and the following disclaimer in the documentation | |
11 * and/or other materials provided with the distribution. | |
12 * - The names of contributors may not be used to endorse or promote products | |
13 * derived from this software without specific prior written permission. | |
14 * | |
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
25 * POSSIBILITY OF SUCH DAMAGE. | |
26 *) | |
27 | |
28 (* Lexing info for Ur/Web programs *) | |
29 | |
30 type pos = int | |
31 type svalue = Tokens.svalue | |
32 type ('a,'b) token = ('a,'b) Tokens.token | |
33 type lexresult = (svalue,pos) Tokens.token | |
34 | |
35 local | |
36 val commentLevel = ref 0 | |
37 val commentPos = ref 0 | |
38 in | |
39 fun enterComment pos = | |
40 (if !commentLevel = 0 then | |
41 commentPos := pos | |
42 else | |
43 (); | |
44 commentLevel := !commentLevel + 1) | |
45 | |
46 fun exitComment () = | |
47 (ignore (commentLevel := !commentLevel - 1); | |
48 !commentLevel = 0) | |
49 | |
50 fun eof () = | |
51 let | |
52 val pos = ErrorMsg.lastLineStart () | |
53 in | |
54 if !commentLevel > 0 then | |
55 ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment" | |
56 else | |
57 (); | |
58 Tokens.EOF (pos, pos) | |
59 end | |
60 end | |
61 | |
62 val strEnder = ref #"\"" | |
63 val str = ref ([] : char list) | |
64 val strStart = ref 0 | |
65 | |
66 local | |
67 val initSig = ref false | |
68 val offset = ref 0 | |
69 in | |
70 | |
71 fun initialSig () = initSig := true | |
72 | |
73 fun pos yypos = yypos - !offset | |
74 | |
75 fun newline yypos = | |
76 if !initSig then | |
77 (initSig := false; | |
78 offset := yypos + 1) | |
79 else | |
80 ErrorMsg.newline (pos yypos) | |
81 | |
82 end | |
83 | |
84 val xmlTag = ref ([] : string list) | |
85 val xmlString = ref true | |
86 val braceLevels = ref ([] : ((unit -> unit) * int) list) | |
87 | |
88 fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels) | |
89 | |
90 fun enterBrace () = | |
91 case !braceLevels of | |
92 (s, i) :: rest => braceLevels := (s, i+1) :: rest | |
93 | _ => () | |
94 | |
95 fun exitBrace () = | |
96 case !braceLevels of | |
97 (s, i) :: rest => | |
98 if i = 1 then | |
99 (braceLevels := rest; | |
100 s ()) | |
101 else | |
102 braceLevels := (s, i-1) :: rest | |
103 | _ => () | |
104 | |
105 fun initialize () = (xmlTag := []; | |
106 xmlString := false) | |
107 | |
108 | |
109 %% | |
110 %header (functor UrwebLexFn(structure Tokens : Urweb_TOKENS)); | |
111 %full | |
112 %s COMMENT STRING XML XMLTAG; | |
113 | |
114 id = [a-z_][A-Za-z0-9_']*; | |
115 cid = [A-Z][A-Za-z0-9_']*; | |
116 ws = [\ \t\012]; | |
117 intconst = [0-9]+; | |
118 realconst = [0-9]+\.[0-9]*; | |
119 notags = [^<{\n]+; | |
120 | |
121 %% | |
122 | |
123 <INITIAL> \n => (newline yypos; | |
124 continue ()); | |
125 <COMMENT> \n => (newline yypos; | |
126 continue ()); | |
127 <XMLTAG> \n => (newline yypos; | |
128 continue ()); | |
129 <XML> \n => (newline yypos; | |
130 Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); | |
131 | |
132 <INITIAL> {ws}+ => (lex ()); | |
133 | |
134 <INITIAL> "(*" => (YYBEGIN COMMENT; | |
135 enterComment (pos yypos); | |
136 continue ()); | |
137 <INITIAL> "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments"; | |
138 continue ()); | |
139 | |
140 <COMMENT> "(*" => (enterComment (pos yypos); | |
141 continue ()); | |
142 <COMMENT> "*)" => (if exitComment () then YYBEGIN INITIAL else (); | |
143 continue ()); | |
144 | |
145 <INITIAL> "\"" => (YYBEGIN STRING; strEnder := #"\""; strStart := pos yypos; str := []; continue()); | |
146 <INITIAL> "'" => (YYBEGIN STRING; strEnder := #"'"; strStart := pos yypos; str := []; continue()); | |
147 <STRING> "\\\"" => (str := #"\"" :: !str; continue()); | |
148 <STRING> "\\'" => (str := #"'" :: !str; continue()); | |
149 <STRING> "\n" => (newline yypos; | |
150 str := #"\n" :: !str; continue()); | |
151 <STRING> . => (let | |
152 val ch = String.sub (yytext, 0) | |
153 in | |
154 if ch = !strEnder then | |
155 (if !xmlString then | |
156 (xmlString := false; YYBEGIN XMLTAG) | |
157 else | |
158 YYBEGIN INITIAL; | |
159 Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1)) | |
160 else | |
161 (str := ch :: !str; | |
162 continue ()) | |
163 end); | |
164 | |
165 <INITIAL> "<" {id} ">"=> (let | |
166 val tag = String.substring (yytext, 1, size yytext - 2) | |
167 in | |
168 YYBEGIN XML; | |
169 xmlTag := tag :: (!xmlTag); | |
170 Tokens.XML_BEGIN (tag, yypos, yypos + size yytext) | |
171 end); | |
172 <XML> "</" {id} ">" => (let | |
173 val id = String.substring (yytext, 2, size yytext - 3) | |
174 in | |
175 case !xmlTag of | |
176 id' :: rest => | |
177 if id = id' then | |
178 (YYBEGIN INITIAL; | |
179 xmlTag := rest; | |
180 Tokens.XML_END (yypos, yypos + size yytext)) | |
181 else | |
182 Tokens.END_TAG (id, yypos, yypos + size yytext) | |
183 | _ => | |
184 Tokens.END_TAG (id, yypos, yypos + size yytext) | |
185 end); | |
186 | |
187 <XML> "<" {id} => (YYBEGIN XMLTAG; | |
188 Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE), | |
189 yypos, yypos + size yytext)); | |
190 | |
191 <XMLTAG> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext)); | |
192 <XMLTAG> ">" => (YYBEGIN XML; | |
193 Tokens.GT (yypos, yypos + size yytext)); | |
194 | |
195 <XMLTAG> {ws}+ => (lex ()); | |
196 | |
197 <XMLTAG> {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext)); | |
198 <XMLTAG> "=" => (Tokens.EQ (yypos, yypos + size yytext)); | |
199 | |
200 <XMLTAG> {intconst} => (case Int64.fromString yytext of | |
201 SOME x => Tokens.INT (x, yypos, yypos + size yytext) | |
202 | NONE => (ErrorMsg.errorAt' (yypos, yypos) | |
203 ("Expected int, received: " ^ yytext); | |
204 continue ())); | |
205 <XMLTAG> {realconst} => (case Real.fromString yytext of | |
206 SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext) | |
207 | NONE => (ErrorMsg.errorAt' (yypos, yypos) | |
208 ("Expected float, received: " ^ yytext); | |
209 continue ())); | |
210 <XMLTAG> "\"" => (YYBEGIN STRING; | |
211 xmlString := true; | |
212 strStart := yypos; str := []; continue ()); | |
213 | |
214 <XMLTAG> "{" => (YYBEGIN INITIAL; | |
215 pushLevel (fn () => YYBEGIN XMLTAG); | |
216 Tokens.LBRACE (yypos, yypos + 1)); | |
217 <XMLTAG> "(" => (YYBEGIN INITIAL; | |
218 pushLevel (fn () => YYBEGIN XMLTAG); | |
219 Tokens.LPAREN (yypos, yypos + 1)); | |
220 | |
221 <XMLTAG> . => (ErrorMsg.errorAt' (yypos, yypos) | |
222 ("illegal XML tag character: \"" ^ yytext ^ "\""); | |
223 continue ()); | |
224 | |
225 <XML> "{" => (YYBEGIN INITIAL; | |
226 pushLevel (fn () => YYBEGIN XML); | |
227 Tokens.LBRACE (yypos, yypos + 1)); | |
228 | |
229 <XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); | |
230 | |
231 <XML> . => (ErrorMsg.errorAt' (yypos, yypos) | |
232 ("illegal XML character: \"" ^ yytext ^ "\""); | |
233 continue ()); | |
234 | |
235 <INITIAL> "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext)); | |
236 <INITIAL> "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext)); | |
237 <INITIAL> ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext)); | |
238 <INITIAL> "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext)); | |
239 <INITIAL> "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext)); | |
240 <INITIAL> "{" => (enterBrace (); | |
241 Tokens.LBRACE (pos yypos, pos yypos + size yytext)); | |
242 <INITIAL> "}" => (exitBrace (); | |
243 Tokens.RBRACE (pos yypos, pos yypos + size yytext)); | |
244 | |
245 <INITIAL> "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext)); | |
246 <INITIAL> "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext)); | |
247 <INITIAL> "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext)); | |
248 <INITIAL> "--" => (Tokens.MINUSMINUS (pos yypos, pos yypos + size yytext)); | |
249 | |
250 <INITIAL> "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext)); | |
251 <INITIAL> "<>" => (Tokens.NE (pos yypos, pos yypos + size yytext)); | |
252 <INITIAL> "<" => (Tokens.LT (pos yypos, pos yypos + size yytext)); | |
253 <INITIAL> ">" => (Tokens.GT (pos yypos, pos yypos + size yytext)); | |
254 <INITIAL> "<=" => (Tokens.LE (pos yypos, pos yypos + size yytext)); | |
255 <INITIAL> ">=" => (Tokens.GE (pos yypos, pos yypos + size yytext)); | |
256 <INITIAL> "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext)); | |
257 <INITIAL> ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext)); | |
258 <INITIAL> "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext)); | |
259 <INITIAL> ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext)); | |
260 <INITIAL> "..." => (Tokens.DOTDOTDOT (pos yypos, pos yypos + size yytext)); | |
261 <INITIAL> "." => (Tokens.DOT (pos yypos, pos yypos + size yytext)); | |
262 <INITIAL> "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext)); | |
263 <INITIAL> "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext)); | |
264 <INITIAL> "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext)); | |
265 <INITIAL> "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext)); | |
266 <INITIAL> "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext)); | |
267 <INITIAL> "|" => (Tokens.BAR (pos yypos, pos yypos + size yytext)); | |
268 <INITIAL> "*" => (Tokens.STAR (pos yypos, pos yypos + size yytext)); | |
269 <INITIAL> "<-" => (Tokens.LARROW (pos yypos, pos yypos + size yytext)); | |
270 <INITIAL> ";" => (Tokens.SEMI (pos yypos, pos yypos + size yytext)); | |
271 | |
272 <INITIAL> "con" => (Tokens.CON (pos yypos, pos yypos + size yytext)); | |
273 <INITIAL> "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext)); | |
274 <INITIAL> "datatype" => (Tokens.DATATYPE (pos yypos, pos yypos + size yytext)); | |
275 <INITIAL> "of" => (Tokens.OF (pos yypos, pos yypos + size yytext)); | |
276 <INITIAL> "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext)); | |
277 <INITIAL> "rec" => (Tokens.REC (pos yypos, pos yypos + size yytext)); | |
278 <INITIAL> "and" => (Tokens.AND (pos yypos, pos yypos + size yytext)); | |
279 <INITIAL> "fun" => (Tokens.FUN (pos yypos, pos yypos + size yytext)); | |
280 <INITIAL> "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext)); | |
281 <INITIAL> "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext)); | |
282 <INITIAL> "case" => (Tokens.CASE (pos yypos, pos yypos + size yytext)); | |
283 <INITIAL> "if" => (Tokens.IF (pos yypos, pos yypos + size yytext)); | |
284 <INITIAL> "then" => (Tokens.THEN (pos yypos, pos yypos + size yytext)); | |
285 <INITIAL> "else" => (Tokens.ELSE (pos yypos, pos yypos + size yytext)); | |
286 | |
287 <INITIAL> "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext)); | |
288 <INITIAL> "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext)); | |
289 <INITIAL> "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext)); | |
290 <INITIAL> "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext)); | |
291 <INITIAL> "end" => (Tokens.END (pos yypos, pos yypos + size yytext)); | |
292 <INITIAL> "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext)); | |
293 <INITIAL> "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext)); | |
294 <INITIAL> "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext)); | |
295 <INITIAL> "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext)); | |
296 <INITIAL> "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext)); | |
297 <INITIAL> "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext)); | |
298 <INITIAL> "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext)); | |
299 <INITIAL> "export" => (Tokens.EXPORT (pos yypos, pos yypos + size yytext)); | |
300 <INITIAL> "table" => (Tokens.TABLE (pos yypos, pos yypos + size yytext)); | |
301 <INITIAL> "class" => (Tokens.CLASS (pos yypos, pos yypos + size yytext)); | |
302 | |
303 <INITIAL> "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext)); | |
304 <INITIAL> "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext)); | |
305 <INITIAL> "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext)); | |
306 | |
307 <INITIAL> "SELECT" => (Tokens.SELECT (pos yypos, pos yypos + size yytext)); | |
308 <INITIAL> "FROM" => (Tokens.FROM (pos yypos, pos yypos + size yytext)); | |
309 <INITIAL> "AS" => (Tokens.AS (pos yypos, pos yypos + size yytext)); | |
310 <INITIAL> "WHERE" => (Tokens.CWHERE (pos yypos, pos yypos + size yytext)); | |
311 <INITIAL> "GROUP" => (Tokens.GROUP (pos yypos, pos yypos + size yytext)); | |
312 <INITIAL> "ORDER" => (Tokens.ORDER (pos yypos, pos yypos + size yytext)); | |
313 <INITIAL> "BY" => (Tokens.BY (pos yypos, pos yypos + size yytext)); | |
314 <INITIAL> "HAVING" => (Tokens.HAVING (pos yypos, pos yypos + size yytext)); | |
315 <INITIAL> "LIMIT" => (Tokens.LIMIT (pos yypos, pos yypos + size yytext)); | |
316 <INITIAL> "OFFSET" => (Tokens.OFFSET (pos yypos, pos yypos + size yytext)); | |
317 <INITIAL> "ALL" => (Tokens.ALL (pos yypos, pos yypos + size yytext)); | |
318 | |
319 <INITIAL> "UNION" => (Tokens.UNION (pos yypos, pos yypos + size yytext)); | |
320 <INITIAL> "INTERSECT" => (Tokens.INTERSECT (pos yypos, pos yypos + size yytext)); | |
321 <INITIAL> "EXCEPT" => (Tokens.EXCEPT (pos yypos, pos yypos + size yytext)); | |
322 | |
323 <INITIAL> "TRUE" => (Tokens.TRUE (pos yypos, pos yypos + size yytext)); | |
324 <INITIAL> "FALSE" => (Tokens.FALSE (pos yypos, pos yypos + size yytext)); | |
325 <INITIAL> "AND" => (Tokens.CAND (pos yypos, pos yypos + size yytext)); | |
326 <INITIAL> "OR" => (Tokens.OR (pos yypos, pos yypos + size yytext)); | |
327 <INITIAL> "NOT" => (Tokens.NOT (pos yypos, pos yypos + size yytext)); | |
328 | |
329 <INITIAL> "COUNT" => (Tokens.COUNT (pos yypos, pos yypos + size yytext)); | |
330 <INITIAL> "AVG" => (Tokens.AVG (pos yypos, pos yypos + size yytext)); | |
331 <INITIAL> "SUM" => (Tokens.SUM (pos yypos, pos yypos + size yytext)); | |
332 <INITIAL> "MIN" => (Tokens.MIN (pos yypos, pos yypos + size yytext)); | |
333 <INITIAL> "MAX" => (Tokens.MAX (pos yypos, pos yypos + size yytext)); | |
334 | |
335 <INITIAL> {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext)); | |
336 <INITIAL> {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext)); | |
337 | |
338 <INITIAL> {intconst} => (case Int64.fromString yytext of | |
339 SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext) | |
340 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) | |
341 ("Expected int, received: " ^ yytext); | |
342 continue ())); | |
343 <INITIAL> {realconst} => (case Real64.fromString yytext of | |
344 SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext) | |
345 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) | |
346 ("Expected float, received: " ^ yytext); | |
347 continue ())); | |
348 | |
349 <COMMENT> . => (continue()); | |
350 | |
351 <INITIAL> . => (ErrorMsg.errorAt' (pos yypos, pos yypos) | |
352 ("illegal character: \"" ^ yytext ^ "\""); | |
353 continue ()); |