adamc@1: (* Copyright (c) 2008, Adam Chlipala adamc@1: * All rights reserved. adamc@1: * adamc@1: * Redistribution and use in source and binary forms, with or without adamc@1: * modification, are permitted provided that the following conditions are met: adamc@1: * adamc@1: * - Redistributions of source code must retain the above copyright notice, adamc@1: * this list of conditions and the following disclaimer. adamc@1: * - Redistributions in binary form must reproduce the above copyright notice, adamc@1: * this list of conditions and the following disclaimer in the documentation adamc@1: * and/or other materials provided with the distribution. adamc@1: * - The names of contributors may not be used to endorse or promote products adamc@1: * derived from this software without specific prior written permission. adamc@1: * adamc@1: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" adamc@1: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE adamc@1: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE adamc@1: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE adamc@1: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR adamc@1: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF adamc@1: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS adamc@1: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN adamc@1: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) adamc@1: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE adamc@1: * POSSIBILITY OF SUCH DAMAGE. adamc@1: *) adamc@1: adamc@1: (* Lexing info for Laconic/Web programs *) adamc@1: adamc@1: type pos = int adamc@1: type svalue = Tokens.svalue adamc@1: type ('a,'b) token = ('a,'b) Tokens.token adamc@1: type lexresult = (svalue,pos) Tokens.token adamc@1: adamc@1: local adamc@1: val commentLevel = ref 0 adamc@1: val commentPos = ref 0 adamc@1: in adamc@1: fun enterComment pos = adamc@1: (if !commentLevel = 0 then adamc@1: commentPos := pos adamc@1: else adamc@1: (); adamc@1: commentLevel := !commentLevel + 1) adamc@1: adamc@1: fun exitComment () = adamc@1: (ignore (commentLevel := !commentLevel - 1); adamc@1: !commentLevel = 0) adamc@1: adamc@1: fun eof () = adamc@1: let adamc@1: val pos = ErrorMsg.lastLineStart () adamc@1: in adamc@1: if !commentLevel > 0 then adamc@1: ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment" adamc@1: else adamc@1: (); adamc@1: Tokens.EOF (pos, pos) adamc@1: end adamc@1: end adamc@1: adamc@14: val str = ref ([] : char list) adamc@14: val strStart = ref 0 adamc@14: adamc@54: local adamc@54: val initSig = ref false adamc@54: val offset = ref 0 adamc@54: in adamc@54: adamc@54: fun initialSig () = initSig := true adamc@54: adamc@54: fun pos yypos = yypos - !offset adamc@54: adamc@54: fun newline yypos = adamc@54: if !initSig then adamc@54: (initSig := false; adamc@54: offset := yypos + 1) adamc@54: else adamc@54: ErrorMsg.newline (pos yypos) adamc@54: adamc@54: end adamc@54: adamc@54: adamc@1: %% adamc@1: %header (functor LacwebLexFn(structure Tokens : Lacweb_TOKENS)); adamc@1: %full adamc@14: %s COMMENT STRING; adamc@1: adamc@48: id = [a-z_][A-Za-z0-9_']*; adamc@48: cid = [A-Z][A-Za-z0-9_']*; adamc@1: ws = [\ \t\012]; adamc@14: intconst = [0-9]+; adamc@14: realconst = [0-9]+\.[0-9]*; adamc@1: adamc@1: %% adamc@1: adamc@54: \n => (newline yypos; adamc@1: continue ()); adamc@54: \n => (newline yypos; adamc@1: continue ()); adamc@1: adamc@1: {ws}+ => (lex ()); adamc@1: adamc@1: "(*" => (YYBEGIN COMMENT; adamc@54: enterComment (pos yypos); adamc@1: continue ()); adamc@54: "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments"; adamc@1: continue ()); adamc@1: adamc@54: "(*" => (enterComment (pos yypos); adamc@1: continue ()); adamc@1: "*)" => (if exitComment () then YYBEGIN INITIAL else (); adamc@1: continue ()); adamc@1: adamc@54: "\"" => (YYBEGIN STRING; strStart := pos yypos; str := []; continue()); adamc@14: "\\\"" => (str := #"\"" :: !str; continue()); adamc@14: "\"" => (YYBEGIN INITIAL; adamc@54: Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1)); adamc@54: "\n" => (newline yypos; adamc@14: str := #"\n" :: !str; continue()); adamc@14: . => (str := String.sub (yytext, 0) :: !str; continue()); adamc@14: adamc@82: "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext)); adamc@54: "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext)); adamc@54: ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext)); adamc@54: "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext)); adamc@54: "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext)); adamc@54: "{" => (Tokens.LBRACE (pos yypos, pos yypos + size yytext)); adamc@54: "}" => (Tokens.RBRACE (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext)); adamc@54: "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext)); adamc@54: "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext)); adamc@54: "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext)); adamc@54: ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext)); adamc@54: "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext)); adamc@54: ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext)); adamc@54: "." => (Tokens.DOT (pos yypos, pos yypos + size yytext)); adamc@54: "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext)); adamc@54: "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext)); adamc@54: "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext)); adamc@54: "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext)); adamc@84: "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "con" => (Tokens.CON (pos yypos, pos yypos + size yytext)); adamc@54: "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext)); adamc@54: "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext)); adamc@54: "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext)); adamc@67: "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext)); adamc@54: "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext)); adamc@54: "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext)); adamc@54: "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext)); adamc@54: "end" => (Tokens.END (pos yypos, pos yypos + size yytext)); adamc@54: "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext)); adamc@54: "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext)); adamc@54: "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext)); adamc@58: "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext)); adamc@58: "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext)); adamc@30: adamc@54: "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext)); adamc@54: "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext)); adamc@82: "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext)); adamc@54: {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext)); adamc@1: adamc@14: {intconst} => (case Int64.fromString yytext of adamc@54: SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext) adamc@54: | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) adamc@14: ("Expected int, received: " ^ yytext); adamc@14: continue ())); adamc@14: {realconst} => (case Real64.fromString yytext of adamc@54: SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext) adamc@54: | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) adamc@14: ("Expected float, received: " ^ yytext); adamc@14: continue ())); adamc@14: adamc@1: . => (continue()); adamc@1: adamc@54: . => (ErrorMsg.errorAt' (pos yypos, pos yypos) adamc@1: ("illegal character: \"" ^ yytext ^ "\""); adamc@1: continue ());