adamc@1
|
1 (* Copyright (c) 2008, Adam Chlipala
|
adamc@1
|
2 * All rights reserved.
|
adamc@1
|
3 *
|
adamc@1
|
4 * Redistribution and use in source and binary forms, with or without
|
adamc@1
|
5 * modification, are permitted provided that the following conditions are met:
|
adamc@1
|
6 *
|
adamc@1
|
7 * - Redistributions of source code must retain the above copyright notice,
|
adamc@1
|
8 * this list of conditions and the following disclaimer.
|
adamc@1
|
9 * - Redistributions in binary form must reproduce the above copyright notice,
|
adamc@1
|
10 * this list of conditions and the following disclaimer in the documentation
|
adamc@1
|
11 * and/or other materials provided with the distribution.
|
adamc@1
|
12 * - The names of contributors may not be used to endorse or promote products
|
adamc@1
|
13 * derived from this software without specific prior written permission.
|
adamc@1
|
14 *
|
adamc@1
|
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
adamc@1
|
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
adamc@1
|
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
adamc@1
|
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
adamc@1
|
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
adamc@1
|
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
adamc@1
|
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
adamc@1
|
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
adamc@1
|
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
adamc@1
|
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
adamc@1
|
25 * POSSIBILITY OF SUCH DAMAGE.
|
adamc@1
|
26 *)
|
adamc@1
|
27
|
adamc@1
|
28 (* Lexing info for Laconic/Web programs *)
|
adamc@1
|
29
|
adamc@1
|
30 type pos = int
|
adamc@1
|
31 type svalue = Tokens.svalue
|
adamc@1
|
32 type ('a,'b) token = ('a,'b) Tokens.token
|
adamc@1
|
33 type lexresult = (svalue,pos) Tokens.token
|
adamc@1
|
34
|
adamc@1
|
35 local
|
adamc@1
|
36 val commentLevel = ref 0
|
adamc@1
|
37 val commentPos = ref 0
|
adamc@1
|
38 in
|
adamc@1
|
39 fun enterComment pos =
|
adamc@1
|
40 (if !commentLevel = 0 then
|
adamc@1
|
41 commentPos := pos
|
adamc@1
|
42 else
|
adamc@1
|
43 ();
|
adamc@1
|
44 commentLevel := !commentLevel + 1)
|
adamc@1
|
45
|
adamc@1
|
46 fun exitComment () =
|
adamc@1
|
47 (ignore (commentLevel := !commentLevel - 1);
|
adamc@1
|
48 !commentLevel = 0)
|
adamc@1
|
49
|
adamc@1
|
50 fun eof () =
|
adamc@1
|
51 let
|
adamc@1
|
52 val pos = ErrorMsg.lastLineStart ()
|
adamc@1
|
53 in
|
adamc@1
|
54 if !commentLevel > 0 then
|
adamc@1
|
55 ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment"
|
adamc@1
|
56 else
|
adamc@1
|
57 ();
|
adamc@1
|
58 Tokens.EOF (pos, pos)
|
adamc@1
|
59 end
|
adamc@1
|
60 end
|
adamc@1
|
61
|
adamc@14
|
62 val str = ref ([] : char list)
|
adamc@14
|
63 val strStart = ref 0
|
adamc@14
|
64
|
adamc@1
|
65 %%
|
adamc@1
|
66 %header (functor LacwebLexFn(structure Tokens : Lacweb_TOKENS));
|
adamc@1
|
67 %full
|
adamc@14
|
68 %s COMMENT STRING;
|
adamc@1
|
69
|
adamc@1
|
70 id = [a-z_][A-Za-z0-9_]*;
|
adamc@1
|
71 cid = [A-Z][A-Za-z0-9_]*;
|
adamc@1
|
72 ws = [\ \t\012];
|
adamc@14
|
73 intconst = [0-9]+;
|
adamc@14
|
74 realconst = [0-9]+\.[0-9]*;
|
adamc@1
|
75
|
adamc@1
|
76 %%
|
adamc@1
|
77
|
adamc@1
|
78 <INITIAL> \n => (ErrorMsg.newline yypos;
|
adamc@1
|
79 continue ());
|
adamc@1
|
80 <COMMENT> \n => (ErrorMsg.newline yypos;
|
adamc@1
|
81 continue ());
|
adamc@1
|
82
|
adamc@1
|
83 <INITIAL> {ws}+ => (lex ());
|
adamc@1
|
84
|
adamc@1
|
85 <INITIAL> "(*" => (YYBEGIN COMMENT;
|
adamc@1
|
86 enterComment yypos;
|
adamc@1
|
87 continue ());
|
adamc@1
|
88 <INITIAL> "*)" => (ErrorMsg.errorAt' (yypos, yypos) "Unbalanced comments";
|
adamc@1
|
89 continue ());
|
adamc@1
|
90
|
adamc@1
|
91 <COMMENT> "(*" => (enterComment yypos;
|
adamc@1
|
92 continue ());
|
adamc@1
|
93 <COMMENT> "*)" => (if exitComment () then YYBEGIN INITIAL else ();
|
adamc@1
|
94 continue ());
|
adamc@1
|
95
|
adamc@14
|
96 <INITIAL> "\"" => (YYBEGIN STRING; strStart := yypos; str := []; continue());
|
adamc@14
|
97 <STRING> "\\\"" => (str := #"\"" :: !str; continue());
|
adamc@14
|
98 <STRING> "\"" => (YYBEGIN INITIAL;
|
adamc@14
|
99 Tokens.STRING (String.implode (List.rev (!str)), !strStart, yypos + 1));
|
adamc@14
|
100 <STRING> "\n" => (ErrorMsg.newline yypos;
|
adamc@14
|
101 str := #"\n" :: !str; continue());
|
adamc@14
|
102 <STRING> . => (str := String.sub (yytext, 0) :: !str; continue());
|
adamc@14
|
103
|
adamc@1
|
104 <INITIAL> "(" => (Tokens.LPAREN (yypos, yypos + size yytext));
|
adamc@1
|
105 <INITIAL> ")" => (Tokens.RPAREN (yypos, yypos + size yytext));
|
adamc@1
|
106 <INITIAL> "[" => (Tokens.LBRACK (yypos, yypos + size yytext));
|
adamc@1
|
107 <INITIAL> "]" => (Tokens.RBRACK (yypos, yypos + size yytext));
|
adamc@1
|
108 <INITIAL> "{" => (Tokens.LBRACE (yypos, yypos + size yytext));
|
adamc@1
|
109 <INITIAL> "}" => (Tokens.RBRACE (yypos, yypos + size yytext));
|
adamc@1
|
110
|
adamc@1
|
111 <INITIAL> "->" => (Tokens.ARROW (yypos, yypos + size yytext));
|
adamc@1
|
112 <INITIAL> "=>" => (Tokens.DARROW (yypos, yypos + size yytext));
|
adamc@1
|
113 <INITIAL> "++" => (Tokens.PLUSPLUS (yypos, yypos + size yytext));
|
adamc@1
|
114
|
adamc@1
|
115 <INITIAL> "=" => (Tokens.EQ (yypos, yypos + size yytext));
|
adamc@1
|
116 <INITIAL> "," => (Tokens.COMMA (yypos, yypos + size yytext));
|
adamc@1
|
117 <INITIAL> ":::" => (Tokens.TCOLON (yypos, yypos + size yytext));
|
adamc@1
|
118 <INITIAL> "::" => (Tokens.DCOLON (yypos, yypos + size yytext));
|
adamc@1
|
119 <INITIAL> ":" => (Tokens.COLON (yypos, yypos + size yytext));
|
adamc@1
|
120 <INITIAL> "." => (Tokens.DOT (yypos, yypos + size yytext));
|
adamc@1
|
121 <INITIAL> "$" => (Tokens.DOLLAR (yypos, yypos + size yytext));
|
adamc@1
|
122 <INITIAL> "#" => (Tokens.HASH (yypos, yypos + size yytext));
|
adamc@18
|
123 <INITIAL> "__" => (Tokens.UNDERUNDER (yypos, yypos + size yytext));
|
adamc@18
|
124 <INITIAL> "_" => (Tokens.UNDER (yypos, yypos + size yytext));
|
adamc@1
|
125
|
adamc@1
|
126 <INITIAL> "con" => (Tokens.CON (yypos, yypos + size yytext));
|
adamc@7
|
127 <INITIAL> "type" => (Tokens.LTYPE (yypos, yypos + size yytext));
|
adamc@8
|
128 <INITIAL> "val" => (Tokens.VAL (yypos, yypos + size yytext));
|
adamc@1
|
129 <INITIAL> "fn" => (Tokens.FN (yypos, yypos + size yytext));
|
adamc@1
|
130
|
adamc@30
|
131 <INITIAL> "structure" => (Tokens.STRUCTURE (yypos, yypos + size yytext));
|
adamc@30
|
132 <INITIAL> "signature" => (Tokens.STRUCTURE (yypos, yypos + size yytext));
|
adamc@30
|
133 <INITIAL> "struct" => (Tokens.STRUCT (yypos, yypos + size yytext));
|
adamc@30
|
134 <INITIAL> "sig" => (Tokens.SIG (yypos, yypos + size yytext));
|
adamc@30
|
135 <INITIAL> "end" => (Tokens.END (yypos, yypos + size yytext));
|
adamc@30
|
136
|
adamc@1
|
137 <INITIAL> "Type" => (Tokens.TYPE (yypos, yypos + size yytext));
|
adamc@1
|
138 <INITIAL> "Name" => (Tokens.NAME (yypos, yypos + size yytext));
|
adamc@1
|
139
|
adamc@1
|
140 <INITIAL> {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext));
|
adamc@1
|
141 <INITIAL> {cid} => (Tokens.CSYMBOL (yytext, yypos, yypos + size yytext));
|
adamc@1
|
142
|
adamc@14
|
143 <INITIAL> {intconst} => (case Int64.fromString yytext of
|
adamc@14
|
144 SOME x => Tokens.INT (x, yypos, yypos + size yytext)
|
adamc@14
|
145 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@14
|
146 ("Expected int, received: " ^ yytext);
|
adamc@14
|
147 continue ()));
|
adamc@14
|
148 <INITIAL> {realconst} => (case Real64.fromString yytext of
|
adamc@14
|
149 SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext)
|
adamc@14
|
150 | NONE => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@14
|
151 ("Expected float, received: " ^ yytext);
|
adamc@14
|
152 continue ()));
|
adamc@14
|
153
|
adamc@1
|
154 <COMMENT> . => (continue());
|
adamc@1
|
155
|
adamc@1
|
156 <INITIAL> . => (ErrorMsg.errorAt' (yypos, yypos)
|
adamc@1
|
157 ("illegal character: \"" ^ yytext ^ "\"");
|
adamc@1
|
158 continue ());
|