annotate src/sql.sml @ 1697:cb0f05bdc183

Refactor SQL parsing code from Iflow to Sql, add querydml parser.
author Edward Z. Yang <ezyang@mit.edu>
date Mon, 12 Mar 2012 12:00:23 -0700
parents
children 4d64af730e35 606af2c9b828
rev   line source
ezyang@1697 1 structure Sql = struct
ezyang@1697 2
ezyang@1697 3 open Mono
ezyang@1697 4
ezyang@1697 5 val debug = ref false
ezyang@1697 6
ezyang@1697 7 type lvar = int
ezyang@1697 8
ezyang@1697 9 datatype func =
ezyang@1697 10 DtCon0 of string
ezyang@1697 11 | DtCon1 of string
ezyang@1697 12 | UnCon of string
ezyang@1697 13 | Other of string
ezyang@1697 14
ezyang@1697 15 datatype exp =
ezyang@1697 16 Const of Prim.t
ezyang@1697 17 | Var of int
ezyang@1697 18 | Lvar of lvar
ezyang@1697 19 | Func of func * exp list
ezyang@1697 20 | Recd of (string * exp) list
ezyang@1697 21 | Proj of exp * string
ezyang@1697 22
ezyang@1697 23 datatype reln =
ezyang@1697 24 Known
ezyang@1697 25 | Sql of string
ezyang@1697 26 | PCon0 of string
ezyang@1697 27 | PCon1 of string
ezyang@1697 28 | Eq
ezyang@1697 29 | Ne
ezyang@1697 30 | Lt
ezyang@1697 31 | Le
ezyang@1697 32 | Gt
ezyang@1697 33 | Ge
ezyang@1697 34
ezyang@1697 35 datatype prop =
ezyang@1697 36 True
ezyang@1697 37 | False
ezyang@1697 38 | Unknown
ezyang@1697 39 | And of prop * prop
ezyang@1697 40 | Or of prop * prop
ezyang@1697 41 | Reln of reln * exp list
ezyang@1697 42 | Cond of exp * prop
ezyang@1697 43
ezyang@1697 44 datatype chunk =
ezyang@1697 45 String of string
ezyang@1697 46 | Exp of Mono.exp
ezyang@1697 47
ezyang@1697 48 fun chunkify e =
ezyang@1697 49 case #1 e of
ezyang@1697 50 EPrim (Prim.String s) => [String s]
ezyang@1697 51 | EStrcat (e1, e2) =>
ezyang@1697 52 let
ezyang@1697 53 val chs1 = chunkify e1
ezyang@1697 54 val chs2 = chunkify e2
ezyang@1697 55 in
ezyang@1697 56 case chs2 of
ezyang@1697 57 String s2 :: chs2' =>
ezyang@1697 58 (case List.last chs1 of
ezyang@1697 59 String s1 => List.take (chs1, length chs1 - 1) @ String (s1 ^ s2) :: chs2'
ezyang@1697 60 | _ => chs1 @ chs2)
ezyang@1697 61 | _ => chs1 @ chs2
ezyang@1697 62 end
ezyang@1697 63 | _ => [Exp e]
ezyang@1697 64
ezyang@1697 65 type 'a parser = chunk list -> ('a * chunk list) option
ezyang@1697 66
ezyang@1697 67 fun always v chs = SOME (v, chs)
ezyang@1697 68
ezyang@1697 69 fun parse p s =
ezyang@1697 70 case p (chunkify s) of
ezyang@1697 71 SOME (v, []) => SOME v
ezyang@1697 72 | _ => NONE
ezyang@1697 73
ezyang@1697 74 fun const s chs =
ezyang@1697 75 case chs of
ezyang@1697 76 String s' :: chs => if String.isPrefix s s' then
ezyang@1697 77 SOME ((), if size s = size s' then
ezyang@1697 78 chs
ezyang@1697 79 else
ezyang@1697 80 String (String.extract (s', size s, NONE)) :: chs)
ezyang@1697 81 else
ezyang@1697 82 NONE
ezyang@1697 83 | _ => NONE
ezyang@1697 84
ezyang@1697 85 fun follow p1 p2 chs =
ezyang@1697 86 case p1 chs of
ezyang@1697 87 NONE => NONE
ezyang@1697 88 | SOME (v1, chs) =>
ezyang@1697 89 case p2 chs of
ezyang@1697 90 NONE => NONE
ezyang@1697 91 | SOME (v2, chs) => SOME ((v1, v2), chs)
ezyang@1697 92
ezyang@1697 93 fun wrap p f chs =
ezyang@1697 94 case p chs of
ezyang@1697 95 NONE => NONE
ezyang@1697 96 | SOME (v, chs) => SOME (f v, chs)
ezyang@1697 97
ezyang@1697 98 fun wrapP p f chs =
ezyang@1697 99 case p chs of
ezyang@1697 100 NONE => NONE
ezyang@1697 101 | SOME (v, chs) =>
ezyang@1697 102 case f v of
ezyang@1697 103 NONE => NONE
ezyang@1697 104 | SOME r => SOME (r, chs)
ezyang@1697 105
ezyang@1697 106 fun alt p1 p2 chs =
ezyang@1697 107 case p1 chs of
ezyang@1697 108 NONE => p2 chs
ezyang@1697 109 | v => v
ezyang@1697 110
ezyang@1697 111 fun altL ps =
ezyang@1697 112 case rev ps of
ezyang@1697 113 [] => (fn _ => NONE)
ezyang@1697 114 | p :: ps =>
ezyang@1697 115 foldl (fn (p1, p2) => alt p1 p2) p ps
ezyang@1697 116
ezyang@1697 117 fun opt p chs =
ezyang@1697 118 case p chs of
ezyang@1697 119 NONE => SOME (NONE, chs)
ezyang@1697 120 | SOME (v, chs) => SOME (SOME v, chs)
ezyang@1697 121
ezyang@1697 122 fun skip cp chs =
ezyang@1697 123 case chs of
ezyang@1697 124 String "" :: chs => skip cp chs
ezyang@1697 125 | String s :: chs' => if cp (String.sub (s, 0)) then
ezyang@1697 126 skip cp (String (String.extract (s, 1, NONE)) :: chs')
ezyang@1697 127 else
ezyang@1697 128 SOME ((), chs)
ezyang@1697 129 | _ => SOME ((), chs)
ezyang@1697 130
ezyang@1697 131 fun keep cp chs =
ezyang@1697 132 case chs of
ezyang@1697 133 String "" :: chs => keep cp chs
ezyang@1697 134 | String s :: chs' =>
ezyang@1697 135 let
ezyang@1697 136 val (befor, after) = Substring.splitl cp (Substring.full s)
ezyang@1697 137 in
ezyang@1697 138 if Substring.isEmpty befor then
ezyang@1697 139 NONE
ezyang@1697 140 else
ezyang@1697 141 SOME (Substring.string befor,
ezyang@1697 142 if Substring.isEmpty after then
ezyang@1697 143 chs'
ezyang@1697 144 else
ezyang@1697 145 String (Substring.string after) :: chs')
ezyang@1697 146 end
ezyang@1697 147 | _ => NONE
ezyang@1697 148
ezyang@1697 149 fun ws p = wrap (follow (skip (fn ch => ch = #" "))
ezyang@1697 150 (follow p (skip (fn ch => ch = #" ")))) (#1 o #2)
ezyang@1697 151
ezyang@1697 152 fun log name p chs =
ezyang@1697 153 (if !debug then
ezyang@1697 154 (print (name ^ ": ");
ezyang@1697 155 app (fn String s => print s
ezyang@1697 156 | _ => print "???") chs;
ezyang@1697 157 print "\n")
ezyang@1697 158 else
ezyang@1697 159 ();
ezyang@1697 160 p chs)
ezyang@1697 161
ezyang@1697 162 fun list p chs =
ezyang@1697 163 altL [wrap (follow p (follow (ws (const ",")) (list p)))
ezyang@1697 164 (fn (v, ((), ls)) => v :: ls),
ezyang@1697 165 wrap (ws p) (fn v => [v]),
ezyang@1697 166 always []] chs
ezyang@1697 167
ezyang@1697 168 val ident = keep (fn ch => Char.isAlphaNum ch orelse ch = #"_")
ezyang@1697 169
ezyang@1697 170 val t_ident = wrapP ident (fn s => if String.isPrefix "T_" s then
ezyang@1697 171 SOME (String.extract (s, 2, NONE))
ezyang@1697 172 else
ezyang@1697 173 NONE)
ezyang@1697 174 val uw_ident = wrapP ident (fn s => if String.isPrefix "uw_" s andalso size s >= 4 then
ezyang@1697 175 SOME (str (Char.toUpper (String.sub (s, 3)))
ezyang@1697 176 ^ String.extract (s, 4, NONE))
ezyang@1697 177 else
ezyang@1697 178 NONE)
ezyang@1697 179
ezyang@1697 180 val field = wrap (follow t_ident
ezyang@1697 181 (follow (const ".")
ezyang@1697 182 uw_ident))
ezyang@1697 183 (fn (t, ((), f)) => (t, f))
ezyang@1697 184
ezyang@1697 185 datatype Rel =
ezyang@1697 186 Exps of exp * exp -> prop
ezyang@1697 187 | Props of prop * prop -> prop
ezyang@1697 188
ezyang@1697 189 datatype sqexp =
ezyang@1697 190 SqConst of Prim.t
ezyang@1697 191 | SqTrue
ezyang@1697 192 | SqFalse
ezyang@1697 193 | SqNot of sqexp
ezyang@1697 194 | Field of string * string
ezyang@1697 195 | Computed of string
ezyang@1697 196 | Binop of Rel * sqexp * sqexp
ezyang@1697 197 | SqKnown of sqexp
ezyang@1697 198 | Inj of Mono.exp
ezyang@1697 199 | SqFunc of string * sqexp
ezyang@1697 200 | Unmodeled
ezyang@1697 201 | Null
ezyang@1697 202
ezyang@1697 203 fun cmp s r = wrap (const s) (fn () => Exps (fn (e1, e2) => Reln (r, [e1, e2])))
ezyang@1697 204
ezyang@1697 205 val sqbrel = altL [cmp "=" Eq,
ezyang@1697 206 cmp "<>" Ne,
ezyang@1697 207 cmp "<=" Le,
ezyang@1697 208 cmp "<" Lt,
ezyang@1697 209 cmp ">=" Ge,
ezyang@1697 210 cmp ">" Gt,
ezyang@1697 211 wrap (const "AND") (fn () => Props And),
ezyang@1697 212 wrap (const "OR") (fn () => Props Or)]
ezyang@1697 213
ezyang@1697 214 datatype ('a, 'b) sum = inl of 'a | inr of 'b
ezyang@1697 215
ezyang@1697 216 fun string chs =
ezyang@1697 217 case chs of
ezyang@1697 218 String s :: chs =>
ezyang@1697 219 if size s >= 2 andalso String.sub (s, 0) = #"'" then
ezyang@1697 220 let
ezyang@1697 221 fun loop (cs, acc) =
ezyang@1697 222 case cs of
ezyang@1697 223 [] => NONE
ezyang@1697 224 | c :: cs =>
ezyang@1697 225 if c = #"'" then
ezyang@1697 226 SOME (String.implode (rev acc), cs)
ezyang@1697 227 else if c = #"\\" then
ezyang@1697 228 case cs of
ezyang@1697 229 c :: cs => loop (cs, c :: acc)
ezyang@1697 230 | _ => raise Fail "Iflow.string: Unmatched backslash escape"
ezyang@1697 231 else
ezyang@1697 232 loop (cs, c :: acc)
ezyang@1697 233 in
ezyang@1697 234 case loop (String.explode (String.extract (s, 1, NONE)), []) of
ezyang@1697 235 NONE => NONE
ezyang@1697 236 | SOME (s, []) => SOME (s, chs)
ezyang@1697 237 | SOME (s, cs) => SOME (s, String (String.implode cs) :: chs)
ezyang@1697 238 end
ezyang@1697 239 else
ezyang@1697 240 NONE
ezyang@1697 241 | _ => NONE
ezyang@1697 242
ezyang@1697 243 val prim =
ezyang@1697 244 altL [wrap (follow (wrapP (follow (keep Char.isDigit) (follow (const ".") (keep Char.isDigit)))
ezyang@1697 245 (fn (x, ((), y)) => Option.map Prim.Float (Real64.fromString (x ^ "." ^ y))))
ezyang@1697 246 (opt (const "::float8"))) #1,
ezyang@1697 247 wrap (follow (wrapP (keep Char.isDigit)
ezyang@1697 248 (Option.map Prim.Int o Int64.fromString))
ezyang@1697 249 (opt (const "::int8"))) #1,
ezyang@1697 250 wrap (follow (opt (const "E")) (follow string (opt (const "::text"))))
ezyang@1697 251 (Prim.String o #1 o #2)]
ezyang@1697 252
ezyang@1697 253 fun known' chs =
ezyang@1697 254 case chs of
ezyang@1697 255 Exp (EFfi ("Basis", "sql_known"), _) :: chs => SOME ((), chs)
ezyang@1697 256 | _ => NONE
ezyang@1697 257
ezyang@1697 258 fun sqlify chs =
ezyang@1697 259 case chs of
ezyang@1697 260 Exp (EFfiApp ("Basis", f, [(e, _)]), _) :: chs =>
ezyang@1697 261 if String.isPrefix "sqlify" f then
ezyang@1697 262 SOME (e, chs)
ezyang@1697 263 else
ezyang@1697 264 NONE
ezyang@1697 265 | Exp (ECase (e, [((PCon (_, PConFfi {mod = "Basis", con = "True", ...}, NONE), _),
ezyang@1697 266 (EPrim (Prim.String "TRUE"), _)),
ezyang@1697 267 ((PCon (_, PConFfi {mod = "Basis", con = "False", ...}, NONE), _),
ezyang@1697 268 (EPrim (Prim.String "FALSE"), _))], _), _) :: chs =>
ezyang@1697 269 SOME (e, chs)
ezyang@1697 270
ezyang@1697 271 | _ => NONE
ezyang@1697 272
ezyang@1697 273 fun constK s = wrap (const s) (fn () => s)
ezyang@1697 274
ezyang@1697 275 val funcName = altL [constK "COUNT",
ezyang@1697 276 constK "MIN",
ezyang@1697 277 constK "MAX",
ezyang@1697 278 constK "SUM",
ezyang@1697 279 constK "AVG"]
ezyang@1697 280
ezyang@1697 281 val unmodeled = altL [const "COUNT(*)",
ezyang@1697 282 const "CURRENT_TIMESTAMP"]
ezyang@1697 283
ezyang@1697 284 fun sqexp chs =
ezyang@1697 285 log "sqexp"
ezyang@1697 286 (altL [wrap prim SqConst,
ezyang@1697 287 wrap (const "TRUE") (fn () => SqTrue),
ezyang@1697 288 wrap (const "FALSE") (fn () => SqFalse),
ezyang@1697 289 wrap (const "NULL") (fn () => Null),
ezyang@1697 290 wrap field Field,
ezyang@1697 291 wrap uw_ident Computed,
ezyang@1697 292 wrap known SqKnown,
ezyang@1697 293 wrap func SqFunc,
ezyang@1697 294 wrap unmodeled (fn () => Unmodeled),
ezyang@1697 295 wrap sqlify Inj,
ezyang@1697 296 wrap (follow (const "COALESCE(") (follow sqexp (follow (const ",")
ezyang@1697 297 (follow (keep (fn ch => ch <> #")")) (const ")")))))
ezyang@1697 298 (fn ((), (e, _)) => e),
ezyang@1697 299 wrap (follow (const "(NOT ") (follow sqexp (const ")")))
ezyang@1697 300 (fn ((), (e, _)) => SqNot e),
ezyang@1697 301 wrap (follow (ws (const "("))
ezyang@1697 302 (follow (wrap
ezyang@1697 303 (follow sqexp
ezyang@1697 304 (alt
ezyang@1697 305 (wrap
ezyang@1697 306 (follow (ws sqbrel)
ezyang@1697 307 (ws sqexp))
ezyang@1697 308 inl)
ezyang@1697 309 (always (inr ()))))
ezyang@1697 310 (fn (e1, sm) =>
ezyang@1697 311 case sm of
ezyang@1697 312 inl (bo, e2) => Binop (bo, e1, e2)
ezyang@1697 313 | inr () => e1))
ezyang@1697 314 (const ")")))
ezyang@1697 315 (fn ((), (e, ())) => e)])
ezyang@1697 316 chs
ezyang@1697 317
ezyang@1697 318 and known chs = wrap (follow known' (follow (const "(") (follow sqexp (const ")"))))
ezyang@1697 319 (fn ((), ((), (e, ()))) => e) chs
ezyang@1697 320
ezyang@1697 321 and func chs = wrap (follow funcName (follow (const "(") (follow sqexp (const ")"))))
ezyang@1697 322 (fn (f, ((), (e, ()))) => (f, e)) chs
ezyang@1697 323
ezyang@1697 324 datatype sitem =
ezyang@1697 325 SqField of string * string
ezyang@1697 326 | SqExp of sqexp * string
ezyang@1697 327
ezyang@1697 328 val sitem = alt (wrap (follow sqexp (follow (const " AS ") uw_ident))
ezyang@1697 329 (fn (e, ((), s)) => SqExp (e, s)))
ezyang@1697 330 (wrap field SqField)
ezyang@1697 331
ezyang@1697 332 val select = log "select"
ezyang@1697 333 (wrap (follow (const "SELECT ") (list sitem))
ezyang@1697 334 (fn ((), ls) => ls))
ezyang@1697 335
ezyang@1697 336 val fitem = wrap (follow uw_ident
ezyang@1697 337 (follow (const " AS ")
ezyang@1697 338 t_ident))
ezyang@1697 339 (fn (t, ((), f)) => (t, f))
ezyang@1697 340
ezyang@1697 341 val from = log "from"
ezyang@1697 342 (wrap (follow (const "FROM ") (list fitem))
ezyang@1697 343 (fn ((), ls) => ls))
ezyang@1697 344
ezyang@1697 345 val wher = wrap (follow (ws (const "WHERE ")) sqexp)
ezyang@1697 346 (fn ((), ls) => ls)
ezyang@1697 347
ezyang@1697 348 type query1 = {Select : sitem list,
ezyang@1697 349 From : (string * string) list,
ezyang@1697 350 Where : sqexp option}
ezyang@1697 351
ezyang@1697 352 val query1 = log "query1"
ezyang@1697 353 (wrap (follow (follow select from) (opt wher))
ezyang@1697 354 (fn ((fs, ts), wher) => {Select = fs, From = ts, Where = wher}))
ezyang@1697 355
ezyang@1697 356 datatype query =
ezyang@1697 357 Query1 of query1
ezyang@1697 358 | Union of query * query
ezyang@1697 359
ezyang@1697 360 val orderby = log "orderby"
ezyang@1697 361 (wrap (follow (ws (const "ORDER BY "))
ezyang@1697 362 (follow (list sqexp)
ezyang@1697 363 (opt (ws (const "DESC")))))
ezyang@1697 364 ignore)
ezyang@1697 365
ezyang@1697 366 fun query chs = log "query"
ezyang@1697 367 (wrap
ezyang@1697 368 (follow
ezyang@1697 369 (alt (wrap (follow (const "((")
ezyang@1697 370 (follow query
ezyang@1697 371 (follow (const ") UNION (")
ezyang@1697 372 (follow query (const "))")))))
ezyang@1697 373 (fn ((), (q1, ((), (q2, ())))) => Union (q1, q2)))
ezyang@1697 374 (wrap query1 Query1))
ezyang@1697 375 (opt orderby))
ezyang@1697 376 #1)
ezyang@1697 377 chs
ezyang@1697 378
ezyang@1697 379 datatype dml =
ezyang@1697 380 Insert of string * (string * sqexp) list
ezyang@1697 381 | Delete of string * sqexp
ezyang@1697 382 | Update of string * (string * sqexp) list * sqexp
ezyang@1697 383
ezyang@1697 384 val insert = log "insert"
ezyang@1697 385 (wrapP (follow (const "INSERT INTO ")
ezyang@1697 386 (follow uw_ident
ezyang@1697 387 (follow (const " (")
ezyang@1697 388 (follow (list uw_ident)
ezyang@1697 389 (follow (const ") VALUES (")
ezyang@1697 390 (follow (list sqexp)
ezyang@1697 391 (const ")")))))))
ezyang@1697 392 (fn ((), (tab, ((), (fs, ((), (es, ())))))) =>
ezyang@1697 393 (SOME (tab, ListPair.zipEq (fs, es)))
ezyang@1697 394 handle ListPair.UnequalLengths => NONE))
ezyang@1697 395
ezyang@1697 396 val delete = log "delete"
ezyang@1697 397 (wrap (follow (const "DELETE FROM ")
ezyang@1697 398 (follow uw_ident
ezyang@1697 399 (follow (const " AS T_T WHERE ")
ezyang@1697 400 sqexp)))
ezyang@1697 401 (fn ((), (tab, ((), es))) => (tab, es)))
ezyang@1697 402
ezyang@1697 403 val setting = log "setting"
ezyang@1697 404 (wrap (follow uw_ident (follow (const " = ") sqexp))
ezyang@1697 405 (fn (f, ((), e)) => (f, e)))
ezyang@1697 406
ezyang@1697 407 val update = log "update"
ezyang@1697 408 (wrap (follow (const "UPDATE ")
ezyang@1697 409 (follow uw_ident
ezyang@1697 410 (follow (const " AS T_T SET ")
ezyang@1697 411 (follow (list setting)
ezyang@1697 412 (follow (ws (const "WHERE "))
ezyang@1697 413 sqexp)))))
ezyang@1697 414 (fn ((), (tab, ((), (fs, ((), e))))) =>
ezyang@1697 415 (tab, fs, e)))
ezyang@1697 416
ezyang@1697 417 val dml = log "dml"
ezyang@1697 418 (altL [wrap insert Insert,
ezyang@1697 419 wrap delete Delete,
ezyang@1697 420 wrap update Update])
ezyang@1697 421
ezyang@1697 422 datatype querydml =
ezyang@1697 423 Query of query
ezyang@1697 424 | Dml of dml
ezyang@1697 425
ezyang@1697 426 val querydml = log "querydml" (altL [wrap dml Dml, wrap query Query])
ezyang@1697 427
ezyang@1697 428 end