Mercurial > urweb
changeset 1777:59b07fdae1ff
Partitioning and ordering for window functions
author | Adam Chlipala <adam@chlipala.net> |
---|---|
date | Sat, 02 Jun 2012 16:47:09 -0400 |
parents | 8f28c3295148 |
children | 818d4097e2ed |
files | doc/manual.tex lib/ur/basis.urs src/monoize.sml src/mysql.sml src/postgres.sml src/settings.sig src/settings.sml src/sqlite.sml src/urweb.grm tests/window.ur |
diffstat | 10 files changed, 137 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/doc/manual.tex Sat Jun 02 16:00:50 2012 -0400 +++ b/doc/manual.tex Sat Jun 02 16:47:09 2012 -0400 @@ -1805,7 +1805,7 @@ \mt{class} \; \mt{sql\_summable} \\ \mt{val} \; \mt{sql\_summable\_int} : \mt{sql\_summable} \; \mt{int} \\ \mt{val} \; \mt{sql\_summable\_float} : \mt{sql\_summable} \; \mt{float} \\ - \mt{val} \; \mt{sql\_avg} : \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \to \mt{sql\_summable} \; \mt{t} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_aggregate} \; \mt{t} \; \mt{nt} \\ + \mt{val} \; \mt{sql\_avg} : \mt{t} ::: \mt{Type} \to \mt{sql\_summable} \; \mt{t} \to \mt{sql\_aggregate} \; \mt{t} \; (\mt{option} \; \mt{float}) \\ \mt{val} \; \mt{sql\_sum} : \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \to \mt{sql\_summable} \; \mt{t} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_aggregate} \; \mt{t} \; \mt{nt} \end{array}$$ @@ -1819,11 +1819,33 @@ \mt{val} \; \mt{sql\_min} : \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \to \mt{sql\_maxable} \; \mt{t} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_aggregate} \; \mt{t} \; \mt{nt} \end{array}$$ +There is a fancier class of aggregates called \emph{window functions}, defined in the SQL standard but currently only supported by Postgres, among the DBMSes that Ur/Web supports. Here are the type family and associated combinator for creating a window function expression: + +$$\begin{array}{l} +\mt{con} \; \mt{sql\_window} :: \{\{\mt{Type}\}\} \to \{\{\mt{Type}\}\} \to \{\mt{Type}\} \to \mt{Type} \to \mt{Type} \\ +\mt{val} \; \mt{sql\_window} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \\ +\hspace{.1in} \to \mt{t} ::: \mt{Type} \\ +\hspace{.1in} \to \mt{sql\_window} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{t} \\ +\hspace{.1in} \to \mt{sql\_partition} \; \mt{tables} \; \mt{agg} \; \mt{exps} \\ +\hspace{.1in} \to \mt{sql\_order\_by} \; \mt{tables} \; \mt{exps} \\ +\hspace{.1in} \to \mt{sql\_exp} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{allow\_window} \; \mt{t} +\end{array}$$ + +The function argument for an SQL \cd{PARTITION BY} clause uses the following type family and combinators: +$$\begin{array}{l} +\mt{con} \; \mt{sql\_partition} :: \{\{\mt{Type}\}\} \to \{\{\mt{Type}\}\} \to \{\mt{Type}\} \to \mt{Type} \\ +\mt{val} \; \mt{sql\_no\_partition} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \\ +\hspace{.1in} \to \mt{sql\_partition} \; \mt{tables} \; \mt{agg} \; \mt{exps} \\ +\mt{val} \; \mt{sql\_partition} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \to \mt{t} ::: \mt{Type} \\ +\hspace{.1in} \to \mt{sql\_exp} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{disallow\_window} \; \mt{t} \\ +\hspace{.1in} \to \mt{sql\_partition} \; \mt{tables} \; \mt{agg} \; \mt{exps} +\end{array}$$ + Any SQL query that returns single columns may be turned into a subquery expression. $$\begin{array}{l} \mt{val} \; \mt{sql\_subquery} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \to \mt{nm} ::: \mt{Name} \\ -\hspace{.1in} \to \mt{aw} ::: \mt{Type} \to \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \\ +\hspace{.1in} \to \mt{aw} ::: \{\mt{Unit}\} \to \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \\ \hspace{.1in} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_query} \; \mt{tables} \; \mt{agg} \; [\mt{nm} = \mt{t}] \to \mt{sql\_exp} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{aw} \; \mt{nt} \end{array}$$ @@ -2194,7 +2216,8 @@ \textrm{SQL expressions} & E &::=& t.f & \textrm{column references} \\ &&& X & \textrm{named expression references} \\ &&& \{[e]\} & \textrm{injected native Ur expressions} \\ - &&& \{e\} & \textrm{computed expressions, probably using $\mt{sql\_exp}$ directly} \\ + &&& \{e\} & \textrm{computed expressions, probably using} \\ + &&&& \hspace{.1in} \textrm{$\mt{sql\_exp}$ directly} \\ &&& \mt{TRUE} \mid \mt{FALSE} & \textrm{boolean constants} \\ &&& \ell & \textrm{primitive type literals} \\ &&& \mt{NULL} & \textrm{null value (injection of $\mt{None}$)} \\ @@ -2203,10 +2226,12 @@ &&& n & \textrm{nullary operators} \\ &&& u \; E & \textrm{unary operators} \\ &&& E \; b \; E & \textrm{binary operators} \\ - &&& \mt{COUNT}(\ast) & \textrm{count number of rows} \\ - &&& a(E) & \textrm{other aggregate function} \\ + &&& \mt{COUNT}(\ast) \; [w] & \textrm{count number of rows} \\ + &&& \mt{RANK}() \; [w] & \textrm{rank in sequence (Postgres only)} \\ + &&& a(E) \; [w] & \textrm{other aggregate function} \\ &&& \mt{IF} \; E \; \mt{THEN} \; E \; \mt{ELSE} \; E & \textrm{conditional} \\ - &&& (Q) & \textrm{subquery (must return a single expression column)} \\ + &&& (Q) & \textrm{subquery (must return a single} \\ + &&&& \hspace{.1in} \textrm{expression column)} \\ &&& (E) & \textrm{explicit precedence} \\ \textrm{Nullary operators} & n &::=& \mt{CURRENT\_TIMESTAMP} \\ \textrm{Unary operators} & u &::=& \mt{NOT} \\ @@ -2214,6 +2239,7 @@ \textrm{Aggregate functions} & a &::=& \mt{COUNT} \mid \mt{AVG} \mid \mt{SUM} \mid \mt{MIN} \mid \mt{MAX} \\ \textrm{Directions} & o &::=& \mt{ASC} \mid \mt{DESC} \mid \{e\} \\ \textrm{SQL integer} & N &::=& n \mid \{e\} \\ + \textrm{Window} & w &::=& \mt{OVER} \; ([\mt{PARTITION} \; \mt{BY} \; E] \; [\mt{ORDER} \; \mt{BY} \; O]) & \textrm{(Postgres only)} \end{array}$$ Additionally, an SQL expression may be inserted into normal Ur code with the syntax $(\mt{SQL} \; E)$ or $(\mt{WHERE} \; E)$. Similar shorthands exist for other nonterminals, with the prefix $\mt{FROM}$ for $\mt{FROM}$ items and $\mt{SELECT1}$ for pre-queries.
--- a/lib/ur/basis.urs Sat Jun 02 16:00:50 2012 -0400 +++ b/lib/ur/basis.urs Sat Jun 02 16:47:09 2012 -0400 @@ -552,7 +552,7 @@ val sql_summable_int : sql_summable int val sql_summable_float : sql_summable float val sql_summable_option : t ::: Type -> sql_summable t -> sql_summable (option t) -val sql_avg : t ::: Type -> nt ::: Type -> sql_summable t -> nullify t nt -> sql_aggregate t nt +val sql_avg : t ::: Type -> sql_summable t -> sql_aggregate t (option float) val sql_sum : t ::: Type -> nt ::: Type -> sql_summable t -> nullify t nt -> sql_aggregate t nt class sql_maxable @@ -564,16 +564,25 @@ val sql_max : t ::: Type -> nt ::: Type -> sql_maxable t -> nullify t nt -> sql_aggregate t nt val sql_min : t ::: Type -> nt ::: Type -> sql_maxable t -> nullify t nt -> sql_aggregate t nt +con sql_partition :: {{Type}} -> {{Type}} -> {Type} -> Type +val sql_no_partition : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} + -> sql_partition tables agg exps +val sql_partition : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} -> t ::: Type + -> sql_exp tables agg exps disallow_window t + -> sql_partition tables agg exps + con sql_window :: {{Type}} -> {{Type}} -> {Type} -> Type -> Type val sql_window : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} -> t ::: Type -> sql_window tables agg exps t + -> sql_partition tables agg exps + -> sql_order_by tables exps -> sql_exp tables agg exps allow_window t val sql_window_aggregate : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} -> t ::: Type -> nt ::: Type -> sql_aggregate t nt - -> sql_exp tables agg exps allow_window t + -> sql_exp tables agg exps disallow_window t -> sql_window tables agg exps nt val sql_window_count : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} -> sql_window tables agg exps int
--- a/src/monoize.sml Sat Jun 02 16:00:50 2012 -0400 +++ b/src/monoize.sml Sat Jun 02 16:47:09 2012 -0400 @@ -299,6 +299,8 @@ (L'.TRecord [], loc) | L.CApp ((L.CFfi ("Basis", "sql_maxable"), _), _) => (L'.TRecord [], loc) + | L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_partition"), _), _), _), _), _), _) => + (L'.TFfi ("Basis", "string"), loc) | L.CApp ((L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_window"), _), _), _), _), _), _), _), _) => (L'.TFfi ("Basis", "string"), loc) | L.CApp ((L.CFfi ("Basis", "sql_arith"), _), _) => @@ -2744,10 +2746,9 @@ ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TRecord [], loc), (L'.ERecord [], loc)), loc), fm) - | L.ECApp ((L.ECApp ((L.EFfi ("Basis", "sql_avg"), _), _), _), _) => - ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFun ((L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc)), loc), - (L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc), - (L'.EPrim (Prim.String "AVG"), loc)), loc)), loc), + | L.ECApp ((L.EFfi ("Basis", "sql_avg"), _), _) => + ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc), + (L'.EPrim (Prim.String "AVG"), loc)), loc), fm) | L.ECApp ((L.ECApp ((L.EFfi ("Basis", "sql_sum"), _), _), _), _) => ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFun ((L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc)), loc), @@ -2783,6 +2784,29 @@ | L.ECApp ( (L.ECApp ( (L.ECApp ( + (L.EFfi ("Basis", "sql_no_partition"), _), + _), _), + _), _), + _) => ((L'.EPrim (Prim.String ""), loc), fm) + | L.ECApp ( + (L.ECApp ( + (L.ECApp ( + (L.ECApp ( + (L.EFfi ("Basis", "sql_partition"), _), + _), _), + _), _), + _), _), + _) => + let + val s = (L'.TFfi ("Basis", "string"), loc) + in + ((L'.EAbs ("e", s, s, strcat [(L'.EPrim (Prim.String "PARTITION BY "), loc), (L'.ERel 0, loc)]), loc), + fm) + end + + | L.ECApp ( + (L.ECApp ( + (L.ECApp ( (L.ECApp ( (L.EFfi ("Basis", "sql_window"), _), _), _), @@ -2790,13 +2814,31 @@ _), _), _) => let + val () = if #windowFunctions (Settings.currentDbms ()) then + () + else + ErrorMsg.errorAt loc "The DBMS you've selected doesn't support window functions." + val s = (L'.TFfi ("Basis", "string"), loc) fun sc s = (L'.EPrim (Prim.String s), loc) - val main = strcat [(L'.ERel 0, loc), - sc " OVER ()"] + val main = strcat [(L'.ERel 2, loc), + sc " OVER (", + (L'.ERel 1, loc), + (L'.ECase ((L'.ERel 0, loc), + [((L'.PPrim (Prim.String ""), loc), + sc ""), + ((L'.PWild, loc), + strcat [sc " ORDER BY ", + (L'.ERel 0, loc)])], + {disc = s, + result = s}), loc), + sc ")"] in - ((L'.EAbs ("w", s, s, main), loc), + ((L'.EAbs ("w", s, (L'.TFun (s, (L'.TFun (s, s), loc)), loc), + (L'.EAbs ("p", s, (L'.TFun (s, s), loc), + (L'.EAbs ("o", s, s, + main), loc)), loc)), loc), fm) end
--- a/src/mysql.sml Sat Jun 02 16:00:50 2012 -0400 +++ b/src/mysql.sml Sat Jun 02 16:47:09 2012 -0400 @@ -1554,6 +1554,7 @@ trueString = "TRUE", falseString = "FALSE", onlyUnion = true, - nestedRelops = false} + nestedRelops = false, + windowFunctions = false} end
--- a/src/postgres.sml Sat Jun 02 16:00:50 2012 -0400 +++ b/src/postgres.sml Sat Jun 02 16:47:09 2012 -0400 @@ -1063,7 +1063,8 @@ trueString = "TRUE", falseString = "FALSE", onlyUnion = false, - nestedRelops = true} + nestedRelops = true, + windowFunctions = true} val () = setDbms "postgres"
--- a/src/settings.sig Sat Jun 02 16:00:50 2012 -0400 +++ b/src/settings.sig Sat Jun 02 16:47:09 2012 -0400 @@ -199,7 +199,8 @@ trueString : string, falseString : string, onlyUnion : bool, - nestedRelops : bool + nestedRelops : bool, + windowFunctions : bool } val addDbms : dbms -> unit
--- a/src/settings.sml Sat Jun 02 16:00:50 2012 -0400 +++ b/src/settings.sml Sat Jun 02 16:47:09 2012 -0400 @@ -537,7 +537,8 @@ trueString : string, falseString : string, onlyUnion : bool, - nestedRelops : bool + nestedRelops : bool, + windowFunctions : bool } val dbmses = ref ([] : dbms list) @@ -568,7 +569,8 @@ trueString = "", falseString = "", onlyUnion = false, - nestedRelops = false} : dbms) + nestedRelops = false, + windowFunctions = false} : dbms) fun addDbms v = dbmses := v :: !dbmses fun setDbms s =
--- a/src/sqlite.sml Sat Jun 02 16:00:50 2012 -0400 +++ b/src/sqlite.sml Sat Jun 02 16:47:09 2012 -0400 @@ -846,6 +846,7 @@ trueString = "1", falseString = "0", onlyUnion = false, - nestedRelops = false} + nestedRelops = false, + windowFunctions = false} end
--- a/src/urweb.grm Sat Jun 02 16:00:50 2012 -0400 +++ b/src/urweb.grm Sat Jun 02 16:47:09 2012 -0400 @@ -303,6 +303,19 @@ foldl (fn (s, e) => (EApp ((EApp ((EVar (["Basis"], "oneProperty", Infer), pos), e), pos), parseProperty s pos), pos)) (EVar (["Basis"], "noStyle", Infer), pos) props +fun applyWindow loc e window = + let + val (pb, ob) = getOpt (window, ((EVar (["Basis"], "sql_no_partition", Infer), dummy), + (ECApp ((EVar (["Basis"], "sql_order_by_Nil", Infer), dummy), + (CWild (KRecord (KType, dummy), dummy), dummy)), + dummy))) + val e' = (EVar (["Basis"], "sql_window", Infer), loc) + val e' = (EApp (e', e), loc) + val e' = (EApp (e', pb), loc) + in + (EApp (e', ob), loc) + end + %% %header (functor UrwebLrValsFn(structure Token : TOKEN)) @@ -456,7 +469,8 @@ | selis of select_item list | select of select | sqlexp of exp - | window of unit option + | window of (exp * exp) option + | pbopt of exp | wopt of exp | groupi of group_item | groupis of group_item list @@ -2036,14 +2050,14 @@ let val e = (EVar (["Basis"], "sql_window_count", Infer), loc) in - (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc) + applyWindow loc e window end end) | RANK UNIT window (let val loc = s (RANKleft, windowright) val e = (EVar (["Basis"], "sql_window_rank", Infer), loc) in - (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc) + applyWindow loc e window end) | COUNT LPAREN sqlexp RPAREN window (let val loc = s (COUNTleft, windowright) @@ -2064,7 +2078,7 @@ e), loc) val e = (EApp (e, sqlexp), loc) in - (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc) + applyWindow loc e window end end) | sqlagg LPAREN sqlexp RPAREN window (let @@ -2086,7 +2100,7 @@ e), loc) val e = (EApp (e, sqlexp), loc) in - (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc) + applyWindow loc e window end end) | COALESCE LPAREN sqlexp COMMA sqlexp RPAREN @@ -2114,7 +2128,16 @@ end) window : (NONE) - | OVER LPAREN RPAREN (SOME ()) + | OVER LPAREN pbopt obopt RPAREN (SOME (pbopt, obopt)) + +pbopt : ((EVar (["Basis"], "sql_no_partition", Infer), dummy)) + | PARTITION BY sqlexp (let + val loc = s (PARTITIONleft, sqlexpright) + + val e = (EVar (["Basis"], "sql_partition", Infer), loc) + in + (EApp (e, sqlexp), loc) + end) fname : SYMBOL (EVar (["Basis"], "sql_" ^ SYMBOL, Infer), s (SYMBOLleft, SYMBOLright)) | LBRACE eexp RBRACE (eexp)
--- a/tests/window.ur Sat Jun 02 16:00:50 2012 -0400 +++ b/tests/window.ur Sat Jun 02 16:47:09 2012 -0400 @@ -3,9 +3,11 @@ Salary : int } fun main () : transaction page = - x <- queryX (SELECT empsalary.Depname, empsalary.Empno, empsalary.Salary, RANK() AS R + x <- queryX (SELECT empsalary.Depname, empsalary.Empno, empsalary.Salary, + RANK() OVER (PARTITION BY empsalary.Depname ORDER BY empsalary.Salary DESC) AS R, + AVG(empsalary.Salary) OVER (PARTITION BY empsalary.Depname) AS A FROM empsalary) - (fn r => <xml>{[r.Empsalary.Depname]}, {[r.Empsalary.Empno]}, {[r.Empsalary.Salary]}, {[r.R]}<br/></xml>); + (fn r => <xml>{[r.Empsalary.Depname]}, {[r.Empsalary.Empno]}, {[r.Empsalary.Salary]}, {[r.R]}, {[r.A]}<br/></xml>); return <xml><body> {x} </body></xml>