adamc@1: (* Copyright (c) 2008, Adam Chlipala adamc@1: * All rights reserved. adamc@1: * adamc@1: * Redistribution and use in source and binary forms, with or without adamc@1: * modification, are permitted provided that the following conditions are met: adamc@1: * adamc@1: * - Redistributions of source code must retain the above copyright notice, adamc@1: * this list of conditions and the following disclaimer. adamc@1: * - Redistributions in binary form must reproduce the above copyright notice, adamc@1: * this list of conditions and the following disclaimer in the documentation adamc@1: * and/or other materials provided with the distribution. adamc@1: * - The names of contributors may not be used to endorse or promote products adamc@1: * derived from this software without specific prior written permission. adamc@1: * adamc@1: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" adamc@1: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE adamc@1: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE adamc@1: * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE adamc@1: * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR adamc@1: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF adamc@1: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS adamc@1: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN adamc@1: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) adamc@1: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE adamc@1: * POSSIBILITY OF SUCH DAMAGE. adamc@1: *) adamc@1: adamc@1: (* Lexing info for Laconic/Web programs *) adamc@1: adamc@1: type pos = int adamc@1: type svalue = Tokens.svalue adamc@1: type ('a,'b) token = ('a,'b) Tokens.token adamc@1: type lexresult = (svalue,pos) Tokens.token adamc@1: adamc@1: local adamc@1: val commentLevel = ref 0 adamc@1: val commentPos = ref 0 adamc@1: in adamc@1: fun enterComment pos = adamc@1: (if !commentLevel = 0 then adamc@1: commentPos := pos adamc@1: else adamc@1: (); adamc@1: commentLevel := !commentLevel + 1) adamc@1: adamc@1: fun exitComment () = adamc@1: (ignore (commentLevel := !commentLevel - 1); adamc@1: !commentLevel = 0) adamc@1: adamc@1: fun eof () = adamc@1: let adamc@1: val pos = ErrorMsg.lastLineStart () adamc@1: in adamc@1: if !commentLevel > 0 then adamc@1: ErrorMsg.errorAt' (!commentPos, !commentPos) "Unterminated comment" adamc@1: else adamc@1: (); adamc@1: Tokens.EOF (pos, pos) adamc@1: end adamc@1: end adamc@1: adamc@14: val str = ref ([] : char list) adamc@14: val strStart = ref 0 adamc@14: adamc@54: local adamc@54: val initSig = ref false adamc@54: val offset = ref 0 adamc@54: in adamc@54: adamc@54: fun initialSig () = initSig := true adamc@54: adamc@54: fun pos yypos = yypos - !offset adamc@54: adamc@54: fun newline yypos = adamc@54: if !initSig then adamc@54: (initSig := false; adamc@54: offset := yypos + 1) adamc@54: else adamc@54: ErrorMsg.newline (pos yypos) adamc@54: adamc@54: end adamc@54: adamc@91: val xmlTag = ref ([] : string list) adamc@91: val xmlString = ref true adamc@91: val braceLevels = ref ([] : ((unit -> unit) * int) list) adamc@91: adamc@91: fun pushLevel s = braceLevels := (s, 1) :: (!braceLevels) adamc@91: adamc@91: fun enterBrace () = adamc@91: case !braceLevels of adamc@91: (s, i) :: rest => braceLevels := (s, i+1) :: rest adamc@91: | _ => () adamc@91: adamc@91: fun exitBrace () = adamc@91: case !braceLevels of adamc@91: (s, i) :: rest => adamc@91: if i = 1 then adamc@91: (braceLevels := rest; adamc@91: s ()) adamc@91: else adamc@91: braceLevels := (s, i-1) :: rest adamc@91: | _ => () adamc@91: adamc@91: fun initialize () = (xmlTag := []; adamc@91: xmlString := false) adamc@91: adamc@54: adamc@1: %% adamc@1: %header (functor LacwebLexFn(structure Tokens : Lacweb_TOKENS)); adamc@1: %full adamc@91: %s COMMENT STRING XML XMLTAG; adamc@1: adamc@48: id = [a-z_][A-Za-z0-9_']*; adamc@48: cid = [A-Z][A-Za-z0-9_']*; adamc@1: ws = [\ \t\012]; adamc@14: intconst = [0-9]+; adamc@14: realconst = [0-9]+\.[0-9]*; adamc@91: notags = [^<{\n]+; adamc@1: adamc@1: %% adamc@1: adamc@54: \n => (newline yypos; adamc@1: continue ()); adamc@54: \n => (newline yypos; adamc@1: continue ()); adamc@91: \n => (newline yypos; adamc@91: continue ()); adamc@91: \n => (newline yypos; adamc@91: Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); adamc@1: adamc@1: {ws}+ => (lex ()); adamc@1: adamc@1: "(*" => (YYBEGIN COMMENT; adamc@54: enterComment (pos yypos); adamc@1: continue ()); adamc@54: "*)" => (ErrorMsg.errorAt' (pos yypos, pos yypos) "Unbalanced comments"; adamc@1: continue ()); adamc@1: adamc@54: "(*" => (enterComment (pos yypos); adamc@1: continue ()); adamc@1: "*)" => (if exitComment () then YYBEGIN INITIAL else (); adamc@1: continue ()); adamc@1: adamc@54: "\"" => (YYBEGIN STRING; strStart := pos yypos; str := []; continue()); adamc@14: "\\\"" => (str := #"\"" :: !str; continue()); adamc@104: "\"" => (if !xmlString then adamc@104: (xmlString := false; YYBEGIN XMLTAG) adamc@104: else adamc@104: YYBEGIN INITIAL; adamc@54: Tokens.STRING (String.implode (List.rev (!str)), !strStart, pos yypos + 1)); adamc@54: "\n" => (newline yypos; adamc@14: str := #"\n" :: !str; continue()); adamc@14: . => (str := String.sub (yytext, 0) :: !str; continue()); adamc@14: adamc@91: "<" {id} ">"=> (let adamc@91: val tag = String.substring (yytext, 1, size yytext - 2) adamc@91: in adamc@91: YYBEGIN XML; adamc@91: xmlTag := tag :: (!xmlTag); adamc@91: Tokens.XML_BEGIN (tag, yypos, yypos + size yytext) adamc@91: end); adamc@91: "" => (let adamc@91: val id = String.substring (yytext, 2, size yytext - 3) adamc@91: in adamc@91: case !xmlTag of adamc@91: id' :: rest => adamc@91: if id = id' then adamc@91: (YYBEGIN INITIAL; adamc@91: xmlTag := rest; adamc@91: Tokens.XML_END (yypos, yypos + size yytext)) adamc@91: else adamc@91: Tokens.END_TAG (id, yypos, yypos + size yytext) adamc@91: | _ => adamc@91: Tokens.END_TAG (id, yypos, yypos + size yytext) adamc@91: end); adamc@91: adamc@91: "<" {id} => (YYBEGIN XMLTAG; adamc@91: Tokens.BEGIN_TAG (String.extract (yytext, 1, NONE), adamc@91: yypos, yypos + size yytext)); adamc@91: adamc@91: "/" => (Tokens.DIVIDE (yypos, yypos + size yytext)); adamc@91: ">" => (YYBEGIN XML; adamc@91: Tokens.GT (yypos, yypos + size yytext)); adamc@91: adamc@91: {ws}+ => (lex ()); adamc@91: adamc@91: {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext)); adamc@91: "=" => (Tokens.EQ (yypos, yypos + size yytext)); adamc@91: adamc@91: {intconst} => (case Int64.fromString yytext of adamc@91: SOME x => Tokens.INT (x, yypos, yypos + size yytext) adamc@91: | NONE => (ErrorMsg.errorAt' (yypos, yypos) adamc@91: ("Expected int, received: " ^ yytext); adamc@91: continue ())); adamc@91: {realconst} => (case Real.fromString yytext of adamc@91: SOME x => Tokens.FLOAT (x, yypos, yypos + size yytext) adamc@91: | NONE => (ErrorMsg.errorAt' (yypos, yypos) adamc@91: ("Expected float, received: " ^ yytext); adamc@91: continue ())); adamc@91: "\"" => (YYBEGIN STRING; adamc@91: xmlString := true; adamc@104: strStart := yypos; str := []; continue ()); adamc@91: adamc@91: "{" => (YYBEGIN INITIAL; adamc@91: pushLevel (fn () => YYBEGIN XMLTAG); adamc@91: Tokens.LBRACE (yypos, yypos + 1)); adamc@91: "(" => (YYBEGIN INITIAL; adamc@91: pushLevel (fn () => YYBEGIN XMLTAG); adamc@91: Tokens.LPAREN (yypos, yypos + 1)); adamc@91: adamc@91: . => (ErrorMsg.errorAt' (yypos, yypos) adamc@91: ("illegal XML tag character: \"" ^ yytext ^ "\""); adamc@91: continue ()); adamc@91: adamc@91: "{" => (YYBEGIN INITIAL; adamc@91: pushLevel (fn () => YYBEGIN XML); adamc@91: Tokens.LBRACE (yypos, yypos + 1)); adamc@91: adamc@91: {notags} => (Tokens.NOTAGS (yytext, yypos, yypos + size yytext)); adamc@91: adamc@91: . => (ErrorMsg.errorAt' (yypos, yypos) adamc@91: ("illegal XML character: \"" ^ yytext ^ "\""); adamc@91: continue ()); adamc@91: adamc@82: "()" => (Tokens.UNIT (pos yypos, pos yypos + size yytext)); adamc@54: "(" => (Tokens.LPAREN (pos yypos, pos yypos + size yytext)); adamc@54: ")" => (Tokens.RPAREN (pos yypos, pos yypos + size yytext)); adamc@54: "[" => (Tokens.LBRACK (pos yypos, pos yypos + size yytext)); adamc@54: "]" => (Tokens.RBRACK (pos yypos, pos yypos + size yytext)); adamc@110: "{" => (enterBrace (); adamc@110: Tokens.LBRACE (pos yypos, pos yypos + size yytext)); adamc@110: "}" => (exitBrace (); adamc@110: Tokens.RBRACE (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "->" => (Tokens.ARROW (pos yypos, pos yypos + size yytext)); adamc@54: "=>" => (Tokens.DARROW (pos yypos, pos yypos + size yytext)); adamc@54: "++" => (Tokens.PLUSPLUS (pos yypos, pos yypos + size yytext)); adamc@149: "--" => (Tokens.MINUSMINUS (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "=" => (Tokens.EQ (pos yypos, pos yypos + size yytext)); adamc@54: "," => (Tokens.COMMA (pos yypos, pos yypos + size yytext)); adamc@54: ":::" => (Tokens.TCOLON (pos yypos, pos yypos + size yytext)); adamc@54: "::" => (Tokens.DCOLON (pos yypos, pos yypos + size yytext)); adamc@54: ":" => (Tokens.COLON (pos yypos, pos yypos + size yytext)); adamc@54: "." => (Tokens.DOT (pos yypos, pos yypos + size yytext)); adamc@54: "$" => (Tokens.DOLLAR (pos yypos, pos yypos + size yytext)); adamc@54: "#" => (Tokens.HASH (pos yypos, pos yypos + size yytext)); adamc@54: "__" => (Tokens.UNDERUNDER (pos yypos, pos yypos + size yytext)); adamc@54: "_" => (Tokens.UNDER (pos yypos, pos yypos + size yytext)); adamc@84: "~" => (Tokens.TWIDDLE (pos yypos, pos yypos + size yytext)); adamc@156: "|" => (Tokens.BAR (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "con" => (Tokens.CON (pos yypos, pos yypos + size yytext)); adamc@54: "type" => (Tokens.LTYPE (pos yypos, pos yypos + size yytext)); adamc@156: "datatype" => (Tokens.DATATYPE (pos yypos, pos yypos + size yytext)); adamc@156: "of" => (Tokens.OF (pos yypos, pos yypos + size yytext)); adamc@54: "val" => (Tokens.VAL (pos yypos, pos yypos + size yytext)); adamc@123: "rec" => (Tokens.REC (pos yypos, pos yypos + size yytext)); adamc@123: "and" => (Tokens.AND (pos yypos, pos yypos + size yytext)); adamc@54: "fn" => (Tokens.FN (pos yypos, pos yypos + size yytext)); adamc@67: "fold" => (Tokens.FOLD (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: "structure" => (Tokens.STRUCTURE (pos yypos, pos yypos + size yytext)); adamc@54: "signature" => (Tokens.SIGNATURE (pos yypos, pos yypos + size yytext)); adamc@54: "struct" => (Tokens.STRUCT (pos yypos, pos yypos + size yytext)); adamc@54: "sig" => (if yypos = 2 then initialSig () else (); Tokens.SIG (pos yypos, pos yypos + size yytext)); adamc@54: "end" => (Tokens.END (pos yypos, pos yypos + size yytext)); adamc@54: "functor" => (Tokens.FUNCTOR (pos yypos, pos yypos + size yytext)); adamc@54: "where" => (Tokens.WHERE (pos yypos, pos yypos + size yytext)); adamc@54: "extern" => (Tokens.EXTERN (pos yypos, pos yypos + size yytext)); adamc@58: "include" => (Tokens.INCLUDE (pos yypos, pos yypos + size yytext)); adamc@58: "open" => (Tokens.OPEN (pos yypos, pos yypos + size yytext)); adamc@88: "constraint"=> (Tokens.CONSTRAINT (pos yypos, pos yypos + size yytext)); adamc@88: "constraints"=> (Tokens.CONSTRAINTS (pos yypos, pos yypos + size yytext)); adamc@109: "export" => (Tokens.EXPORT (pos yypos, pos yypos + size yytext)); adamc@30: adamc@54: "Type" => (Tokens.TYPE (pos yypos, pos yypos + size yytext)); adamc@54: "Name" => (Tokens.NAME (pos yypos, pos yypos + size yytext)); adamc@82: "Unit" => (Tokens.KUNIT (pos yypos, pos yypos + size yytext)); adamc@1: adamc@54: {id} => (Tokens.SYMBOL (yytext, pos yypos, pos yypos + size yytext)); adamc@54: {cid} => (Tokens.CSYMBOL (yytext, pos yypos, pos yypos + size yytext)); adamc@1: adamc@14: {intconst} => (case Int64.fromString yytext of adamc@120: SOME x => Tokens.INT (x, pos yypos, pos yypos + size yytext) adamc@120: | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) adamc@120: ("Expected int, received: " ^ yytext); adamc@120: continue ())); adamc@14: {realconst} => (case Real64.fromString yytext of adamc@54: SOME x => Tokens.FLOAT (x, pos yypos, pos yypos + size yytext) adamc@54: | NONE => (ErrorMsg.errorAt' (pos yypos, pos yypos) adamc@14: ("Expected float, received: " ^ yytext); adamc@14: continue ())); adamc@14: adamc@1: . => (continue()); adamc@1: adamc@54: . => (ErrorMsg.errorAt' (pos yypos, pos yypos) adamc@1: ("illegal character: \"" ^ yytext ^ "\""); adamc@1: continue ());