adamc@1
|
1 (* Copyright (c) 2008, Adam Chlipala
|
adamc@1
|
2 * All rights reserved.
|
adamc@1
|
3 *
|
adamc@1
|
4 * Redistribution and use in source and binary forms, with or without
|
adamc@1
|
5 * modification, are permitted provided that the following conditions are met:
|
adamc@1
|
6 *
|
adamc@1
|
7 * - Redistributions of source code must retain the above copyright notice,
|
adamc@1
|
8 * this list of conditions and the following disclaimer.
|
adamc@1
|
9 * - Redistributions in binary form must reproduce the above copyright notice,
|
adamc@1
|
10 * this list of conditions and the following disclaimer in the documentation
|
adamc@1
|
11 * and/or other materials provided with the distribution.
|
adamc@1
|
12 * - The names of contributors may not be used to endorse or promote products
|
adamc@1
|
13 * derived from this software without specific prior written permission.
|
adamc@1
|
14 *
|
adamc@1
|
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
adamc@1
|
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
adamc@1
|
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
adamc@1
|
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
adamc@1
|
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
adamc@1
|
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
adamc@1
|
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
adamc@1
|
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
adamc@1
|
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
adamc@1
|
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
adamc@1
|
25 * POSSIBILITY OF SUCH DAMAGE.
|
adamc@1
|
26 *)
|
adamc@1
|
27
|
adamc@1
|
28 (* Lexing info for Laconic/Web programs *)
|
adamc@1
|
29
|
adamc@1
|
30 type pos = int
|
adamc@1
|
31 type svalue = Tokens.svalue
|
adamc@1
|
32 type ('a,'b) token = ('a,'b) Tokens.token
|
adamc@1
|
33 type lexresult = (svalue,pos) Tokens.token
|
adamc@1
|
34
|
adamc@1
|
35 local
|
adamc@1
|
36 val commentLevel = ref 0
|
adamc@1
|
37 val commentPos = ref 0
|
adamc@1
|
38 in
|
adamc@1
|
39 fun enterComment pos =
|
adamc@1
|
40 (if !commentLevel = 0 then
|
adamc@1
|
41 commentPos := pos
|
adamc@1
|
42 else
|
adamc@1
|
43 ();
|
adamc@1
|
44 commentLevel := !commentLevel + 1)
|
adamc@1
|
45
|
adamc@1
|
46 fun exitComment () =
|
adamc@1
|
47 (ignore (commentLevel := !commentLevel - 1);
|
adamc@1
|
48 !commentLevel = 0)
|
adamc@1
|
49
|
adamc@1
|
50 fun eof () =
|
adamc@1
|
51 let
|
adamc@1
|
52 val pos = ErrorMsg.lastLineStart ()
|
adamc@1
|
53 in
|
adamc@1
|
54 if !commentLevel > 0 then
|
adamc@1
|
55 ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment"
|
adamc@1
|
56 else
|
adamc@1
|
57 ();
|
adamc@1
|
58 Tokens.EOF (pos, pos)
|
adamc@1
|
59 end
|
adamc@1
|
60 end
|
adamc@1
|
61
|
adamc@14
|
62 val str = ref ([] : char list)
|
adamc@14
|
63 val strStart = ref 0
|
adamc@14
|
64
|
adamc@54
|
65 local
|
adamc@54
|
66 val initSig = ref false
|
adamc@54
|
67 val offset = ref 0
|
adamc@54
|
68 in
|
adamc@54
|
69
|
adamc@54
|
70 fun initialSig () = initSig := true
|
adamc@54
|
71
|
adamc@54
|
72 fun pos yypos = yypos - !offset
|
adamc@54
|
73
|
adamc@54
|
74 fun newline yypos =
|
adamc@54
|
75 if !initSig then
|
adamc@54
|
76 (initSig := false;
|
adamc@54
|
77 offset := yypos + 1)
|
adamc@54
|
78 else
|
adamc@54
|
79 ErrorMsg.newline (pos yypos)
|
adamc@54
|
80
|
adamc@54
|
81 end
|
adamc@54
|
82
|
adamc@91
|
83 val xmlTag = ref ([] : string list)
|
adamc@91
|
84 val xmlString = ref true
|
adamc@91
|
85 val braceLevels = ref ([] : ((unit -> unit) * int) list)
|
adamc@91
|
86
|
adamc@91
|
87 fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels)
|
adamc@91
|
88
|
adamc@91
|
89 fun enterBrace () =
|
adamc@91
|
90 case !braceLevels of
|
adamc@91
|
91 (s, i) :: rest => braceLevels := (s, i+1) :: rest
|
adamc@91
|
92 | _ => ()
|
adamc@91
|
93
|
adamc@91
|
94 fun exitBrace () =
|
adamc@91
|
95 case !braceLevels of
|
adamc@91
|
96 (s, i) :: rest =>
|
adamc@91
|
97 if i = 1 then
|
adamc@91
|
98 (braceLevels := rest;
|
adamc@91
|
99 s ())
|
adamc@91
|
100 else
|
adamc@91
|
101 braceLevels := (s, i-1) :: rest
|
adamc@91
|
102 | _ => ()
|
adamc@91
|
103
|
adamc@91
|
104 fun initialize () = (xmlTag := [];
|
adamc@91
|
105 xmlString := false)
|
adamc@91
|
106
|
adamc@54
|
107
|
adamc@1
|
108 %%
|
adamc@1
|
109 %header (functor LacwebLexFn(structure Tokens : Lacweb_TOKENS));
|
adamc@1
|
110 %full
|
adamc@91
|
111 %s COMMENT STRING XML XMLTAG;
|
adamc@1
|
112
|
adamc@48
|
113 id = [a-z_][A-Za-z0-9_']*;
|
adamc@48
|
114 cid = [A-Z][A-Za-z0-9_']*;
|
adamc@1
|
115 ws = [\ \t\012];
|
adamc@14
|
116 intconst = [0-9]+;
|
adamc@14
|
117 realconst = [0-9]+\.[0-9]*;
|
adamc@91
|
118 notags = [^<{\n]+;
|
adamc@1
|
119
|
adamc@1
|
120 %%
|
adamc@1
|
121
|
adamc@54
|
122 <INITIAL> \n => (newline yypos;
|
adamc@1
|
123 continue ());
|
adamc@54
|
124 <COMMENT> \n => (newline yypos;
|
adamc@1
|
125 continue ());
|
adamc@91
|
126 <XMLTAG> \n => (newline yypos;
|
adamc@91
|
127 continue ());
|
adamc@91
|
128 <XML> \n => (newline yypos;
|
adamc@91
|
129 Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
|
adamc@1
|
130
|
adamc@1
|
131 <INITIAL> {ws}+ => (lex ());
|
adamc@1
|
132
|
adamc@1
|
133 <INITIAL> "(*" => (YYBEGIN COMMENT;
|
adamc@54
|
134 enterComment (pos yypos);
|
adamc@1
|
135 continue ());
|
adamc@54
|
136 <INITIAL> "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments";
|
adamc@1
|
137 continue ());
|
adamc@1
|
138
|
adamc@54
|
139 <COMMENT> "(*" => (enterComment (pos yypos);
|
adamc@1
|
140 continue ());
|
adamc@1
|
141 <COMMENT> "*)" => (if exitComment () then YYBEGIN INITIAL else ();
|
adamc@1
|
142 continue ());
|
adamc@1
|
143
|
adamc@54
|
144 <INITIAL> "\"" => (YYBEGIN STRING; strStart := pos yypos; str := []; continue());
|
adamc@14
|
145 <STRING> "\\\"" => (str := #"\"" :: !str; continue());
|
adamc@14
|
146 <STRING> "\"" => (YYBEGIN INITIAL;
|
adamc@54
|
147 Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1));
|
adamc@54
|
148 <STRING> "\n" => (newline yypos;
|
adamc@14
|
149 str := #"\n" :: !str; continue());
|
adamc@14
|
150 <STRING> . => (str := String.sub (yytext, 0) :: !str; continue());
|
adamc@14
|
151
|
adamc@91
|
152 <INITIAL> "<" {id} ">"=> (let
|
adamc@91
|
153 val tag = String.substring (yytext, 1, size yytext - 2)
|
adamc@91
|
154 in
|
adamc@91
|
155 YYBEGIN XML;
|
adamc@91
|
156 xmlTag := tag :: (!xmlTag);
|
adamc@91
|
157 Tokens.XML_BEGIN (tag, yypos, yypos + size yytext)
|
adamc@91
|
158 end);
|
adamc@91
|
159 <XML> "</" {id} ">" => (let
|
adamc@91
|
160 val id = String.substring (yytext, 2, size yytext - 3)
|
adamc@91
|
161 in
|
adamc@91
|
162 case !xmlTag of
|
adamc@91
|
163 id' :: rest =>
|
adamc@91
|
164 if id = id' then
|
adamc@91
|
165 (YYBEGIN INITIAL;
|
adamc@91
|
166 xmlTag := rest;
|
adamc@91
|
167 Tokens.XML_END (yypos, yypos + size yytext))
|
adamc@91
|
168 else
|
adamc@91
|
169 Tokens.END_TAG (id, yypos, yypos + size yytext)
|
adamc@91
|
170 | _ =>
|
adamc@91
|
171 Tokens.END_TAG (id, yypos, yypos + size yytext)
|
adamc@91
|
172 end);
|
adamc@91
|
173
|
adamc@91
|
174 <XML> "<" {id} => (YYBEGIN XMLTAG;
|
adamc@91
|
175 Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE),
|
adamc@91
|
176 yypos, yypos + size yytext));
|
adamc@91
|
177
|
adamc@91
|
178 <XMLTAG> "/" => (Tokens.DIVIDE (yypos, yypos + size yytext));
|
adamc@91
|
179 <XMLTAG> ">" => (YYBEGIN XML;
|
adamc@91
|
180 Tokens.GT (yypos, yypos + size yytext));
|
adamc@91
|
181
|
adamc@91
|
182 <XMLTAG> {ws}+ => (lex ());
|
adamc@91
|
183
|
adamc@91
|
184 <XMLTAG> {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext));
|
adamc@91
|
185 <XMLTAG> "=" => (Tokens.EQ (yypos, yypos + size yytext));
|
adamc@91
|
186
|
adamc@91
|
187 <XMLTAG> {intconst} => (case Int64.fromString yytext of
|
adamc@91
|
188 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
|
adamc@91
|
189 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@91
|
190 ("Expected int, received: " ^ yytext);
|
adamc@91
|
191 continue ()));
|
adamc@91
|
192 <XMLTAG> {realconst} => (case Real.fromString yytext of
|
adamc@91
|
193 SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext)
|
adamc@91
|
194 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@91
|
195 ("Expected float, received: " ^ yytext);
|
adamc@91
|
196 continue ()));
|
adamc@91
|
197 <XMLTAG> "\"" => (YYBEGIN STRING;
|
adamc@91
|
198 xmlString := true;
|
adamc@91
|
199 strStart := yypos; str := []; continue());
|
adamc@91
|
200
|
adamc@91
|
201 <XMLTAG> "{" => (YYBEGIN INITIAL;
|
adamc@91
|
202 pushLevel (fn () => YYBEGIN XMLTAG);
|
adamc@91
|
203 Tokens.LBRACE (yypos, yypos + 1));
|
adamc@91
|
204 <XMLTAG> "(" => (YYBEGIN INITIAL;
|
adamc@91
|
205 pushLevel (fn () => YYBEGIN XMLTAG);
|
adamc@91
|
206 Tokens.LPAREN (yypos, yypos + 1));
|
adamc@91
|
207
|
adamc@91
|
208 <XMLTAG> . => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@91
|
209 ("illegal XML tag character: \"" ^ yytext ^ "\"");
|
adamc@91
|
210 continue ());
|
adamc@91
|
211
|
adamc@91
|
212 <XML> "{" => (YYBEGIN INITIAL;
|
adamc@91
|
213 pushLevel (fn () => YYBEGIN XML);
|
adamc@91
|
214 Tokens.LBRACE (yypos, yypos + 1));
|
adamc@91
|
215
|
adamc@91
|
216 <XML> {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext));
|
adamc@91
|
217
|
adamc@91
|
218 <XML> . => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@91
|
219 ("illegal XML character: \"" ^ yytext ^ "\"");
|
adamc@91
|
220 continue ());
|
adamc@91
|
221
|
adamc@82
|
222 <INITIAL> "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext));
|
adamc@54
|
223 <INITIAL> "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext));
|
adamc@54
|
224 <INITIAL> ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext));
|
adamc@54
|
225 <INITIAL> "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext));
|
adamc@54
|
226 <INITIAL> "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext));
|
adamc@54
|
227 <INITIAL> "{" => (Tokens.LBRACE (pos yypos, pos yypos + size yytext));
|
adamc@54
|
228 <INITIAL> "}" => (Tokens.RBRACE (pos yypos, pos yypos + size yytext));
|
adamc@1
|
229
|
adamc@54
|
230 <INITIAL> "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext));
|
adamc@54
|
231 <INITIAL> "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext));
|
adamc@54
|
232 <INITIAL> "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext));
|
adamc@1
|
233
|
adamc@54
|
234 <INITIAL> "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext));
|
adamc@54
|
235 <INITIAL> "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext));
|
adamc@54
|
236 <INITIAL> ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext));
|
adamc@54
|
237 <INITIAL> "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext));
|
adamc@54
|
238 <INITIAL> ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext));
|
adamc@54
|
239 <INITIAL> "." => (Tokens.DOT (pos yypos, pos yypos + size yytext));
|
adamc@54
|
240 <INITIAL> "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext));
|
adamc@54
|
241 <INITIAL> "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext));
|
adamc@54
|
242 <INITIAL> "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext));
|
adamc@54
|
243 <INITIAL> "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext));
|
adamc@84
|
244 <INITIAL> "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext));
|
adamc@1
|
245
|
adamc@54
|
246 <INITIAL> "con" => (Tokens.CON (pos yypos, pos yypos + size yytext));
|
adamc@54
|
247 <INITIAL> "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext));
|
adamc@54
|
248 <INITIAL> "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext));
|
adamc@54
|
249 <INITIAL> "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext));
|
adamc@67
|
250 <INITIAL> "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext));
|
adamc@1
|
251
|
adamc@54
|
252 <INITIAL> "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext));
|
adamc@54
|
253 <INITIAL> "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext));
|
adamc@54
|
254 <INITIAL> "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext));
|
adamc@54
|
255 <INITIAL> "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext));
|
adamc@54
|
256 <INITIAL> "end" => (Tokens.END (pos yypos, pos yypos + size yytext));
|
adamc@54
|
257 <INITIAL> "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext));
|
adamc@54
|
258 <INITIAL> "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext));
|
adamc@54
|
259 <INITIAL> "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext));
|
adamc@58
|
260 <INITIAL> "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext));
|
adamc@58
|
261 <INITIAL> "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext));
|
adamc@88
|
262 <INITIAL> "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext));
|
adamc@88
|
263 <INITIAL> "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext));
|
adamc@30
|
264
|
adamc@54
|
265 <INITIAL> "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext));
|
adamc@54
|
266 <INITIAL> "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext));
|
adamc@82
|
267 <INITIAL> "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext));
|
adamc@1
|
268
|
adamc@54
|
269 <INITIAL> {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext));
|
adamc@54
|
270 <INITIAL> {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext));
|
adamc@1
|
271
|
adamc@14
|
272 <INITIAL> {intconst} => (case Int64.fromString yytext of
|
adamc@54
|
273 SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext)
|
adamc@54
|
274 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
|
adamc@14
|
275 ("Expected int, received: " ^ yytext);
|
adamc@14
|
276 continue ()));
|
adamc@14
|
277 <INITIAL> {realconst} => (case Real64.fromString yytext of
|
adamc@54
|
278 SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext)
|
adamc@54
|
279 | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos)
|
adamc@14
|
280 ("Expected float, received: " ^ yytext);
|
adamc@14
|
281 continue ()));
|
adamc@14
|
282
|
adamc@1
|
283 <COMMENT> . => (continue());
|
adamc@1
|
284
|
adamc@54
|
285 <INITIAL> . => (ErrorMsg.errorAt' (pos yypos, pos yypos)
|
adamc@1
|
286 ("illegal character: \"" ^ yytext ^ "\"");
|
adamc@1
|
287 continue ());
|