changeset 1777:59b07fdae1ff

Partitioning and ordering for window functions
author Adam Chlipala <adam@chlipala.net>
date Sat, 02 Jun 2012 16:47:09 -0400
parents 8f28c3295148
children 818d4097e2ed
files doc/manual.tex lib/ur/basis.urs src/monoize.sml src/mysql.sml src/postgres.sml src/settings.sig src/settings.sml src/sqlite.sml src/urweb.grm tests/window.ur
diffstat 10 files changed, 137 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/doc/manual.tex	Sat Jun 02 16:00:50 2012 -0400
+++ b/doc/manual.tex	Sat Jun 02 16:47:09 2012 -0400
@@ -1805,7 +1805,7 @@
   \mt{class} \; \mt{sql\_summable} \\
   \mt{val} \; \mt{sql\_summable\_int} : \mt{sql\_summable} \; \mt{int} \\
   \mt{val} \; \mt{sql\_summable\_float} : \mt{sql\_summable} \; \mt{float} \\
-  \mt{val} \; \mt{sql\_avg} : \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \to \mt{sql\_summable} \; \mt{t} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_aggregate} \; \mt{t} \; \mt{nt} \\
+  \mt{val} \; \mt{sql\_avg} : \mt{t} ::: \mt{Type} \to \mt{sql\_summable} \; \mt{t} \to \mt{sql\_aggregate} \; \mt{t} \; (\mt{option} \; \mt{float}) \\
   \mt{val} \; \mt{sql\_sum} : \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \to \mt{sql\_summable} \; \mt{t} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_aggregate} \; \mt{t} \; \mt{nt}
 \end{array}$$
 
@@ -1819,11 +1819,33 @@
   \mt{val} \; \mt{sql\_min} : \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \to \mt{sql\_maxable} \; \mt{t} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_aggregate} \; \mt{t} \; \mt{nt}
 \end{array}$$
 
+There is a fancier class of aggregates called \emph{window functions}, defined in the SQL standard but currently only supported by Postgres, among the DBMSes that Ur/Web supports.  Here are the type family and associated combinator for creating a window function expression:
+
+$$\begin{array}{l}
+\mt{con} \; \mt{sql\_window} :: \{\{\mt{Type}\}\} \to \{\{\mt{Type}\}\} \to \{\mt{Type}\} \to \mt{Type} \to \mt{Type} \\
+\mt{val} \; \mt{sql\_window} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \\
+\hspace{.1in} \to \mt{t} ::: \mt{Type} \\
+\hspace{.1in} \to \mt{sql\_window} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{t} \\
+\hspace{.1in} \to \mt{sql\_partition} \; \mt{tables} \; \mt{agg} \; \mt{exps} \\
+\hspace{.1in} \to \mt{sql\_order\_by} \; \mt{tables} \; \mt{exps} \\
+\hspace{.1in} \to \mt{sql\_exp} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{allow\_window} \; \mt{t}
+\end{array}$$
+
+The function argument for an SQL \cd{PARTITION BY} clause uses the following type family and combinators:
+$$\begin{array}{l}
+\mt{con} \; \mt{sql\_partition} :: \{\{\mt{Type}\}\} \to \{\{\mt{Type}\}\} \to \{\mt{Type}\} \to \mt{Type} \\
+\mt{val} \; \mt{sql\_no\_partition} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \\
+\hspace{.1in} \to \mt{sql\_partition} \; \mt{tables} \; \mt{agg} \; \mt{exps} \\
+\mt{val} \; \mt{sql\_partition} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \to \mt{t} ::: \mt{Type} \\
+\hspace{.1in} \to \mt{sql\_exp} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{disallow\_window} \; \mt{t} \\
+\hspace{.1in} \to \mt{sql\_partition} \; \mt{tables} \; \mt{agg} \; \mt{exps}
+\end{array}$$
+
 Any SQL query that returns single columns may be turned into a subquery expression.
 
 $$\begin{array}{l}
 \mt{val} \; \mt{sql\_subquery} : \mt{tables} ::: \{\{\mt{Type}\}\} \to \mt{agg} ::: \{\{\mt{Type}\}\} \to \mt{exps} ::: \{\mt{Type}\} \to \mt{nm} ::: \mt{Name} \\
-\hspace{.1in} \to \mt{aw} ::: \mt{Type} \to \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \\
+\hspace{.1in} \to \mt{aw} ::: \{\mt{Unit}\} \to \mt{t} ::: \mt{Type} \to \mt{nt} ::: \mt{Type} \\
 \hspace{.1in} \to \mt{nullify} \; \mt{t} \; \mt{nt} \to \mt{sql\_query} \; \mt{tables} \; \mt{agg} \; [\mt{nm} = \mt{t}] \to \mt{sql\_exp} \; \mt{tables} \; \mt{agg} \; \mt{exps} \; \mt{aw} \; \mt{nt}
 \end{array}$$
 
@@ -2194,7 +2216,8 @@
   \textrm{SQL expressions} & E &::=& t.f & \textrm{column references} \\
   &&& X & \textrm{named expression references} \\
   &&& \{[e]\} & \textrm{injected native Ur expressions} \\
-  &&& \{e\} & \textrm{computed expressions, probably using $\mt{sql\_exp}$ directly} \\
+  &&& \{e\} & \textrm{computed expressions, probably using} \\
+    &&&& \hspace{.1in} \textrm{$\mt{sql\_exp}$ directly} \\
   &&& \mt{TRUE} \mid \mt{FALSE} & \textrm{boolean constants} \\
   &&& \ell & \textrm{primitive type literals} \\
   &&& \mt{NULL} & \textrm{null value (injection of $\mt{None}$)} \\
@@ -2203,10 +2226,12 @@
   &&& n & \textrm{nullary operators} \\
   &&& u \; E & \textrm{unary operators} \\
   &&& E \; b \; E & \textrm{binary operators} \\
-  &&& \mt{COUNT}(\ast) & \textrm{count number of rows} \\
-  &&& a(E) & \textrm{other aggregate function} \\
+  &&& \mt{COUNT}(\ast) \; [w] & \textrm{count number of rows} \\
+  &&& \mt{RANK}() \; [w] & \textrm{rank in sequence (Postgres only)} \\
+  &&& a(E) \; [w] & \textrm{other aggregate function} \\
   &&& \mt{IF} \; E \; \mt{THEN} \; E \; \mt{ELSE} \; E & \textrm{conditional} \\
-  &&& (Q) & \textrm{subquery (must return a single expression column)} \\
+  &&& (Q) & \textrm{subquery (must return a single} \\
+  &&&& \hspace{.1in} \textrm{expression column)} \\
   &&& (E) & \textrm{explicit precedence} \\
   \textrm{Nullary operators} & n &::=& \mt{CURRENT\_TIMESTAMP} \\
   \textrm{Unary operators} & u &::=& \mt{NOT} \\
@@ -2214,6 +2239,7 @@
   \textrm{Aggregate functions} & a &::=& \mt{COUNT} \mid \mt{AVG} \mid \mt{SUM} \mid \mt{MIN} \mid \mt{MAX} \\
   \textrm{Directions} & o &::=& \mt{ASC} \mid \mt{DESC} \mid \{e\} \\
   \textrm{SQL integer} & N &::=& n \mid \{e\} \\
+  \textrm{Window} & w &::=& \mt{OVER} \; ([\mt{PARTITION} \; \mt{BY} \; E] \; [\mt{ORDER} \; \mt{BY} \; O]) & \textrm{(Postgres only)}
 \end{array}$$
 
 Additionally, an SQL expression may be inserted into normal Ur code with the syntax $(\mt{SQL} \; E)$ or $(\mt{WHERE} \; E)$.  Similar shorthands exist for other nonterminals, with the prefix $\mt{FROM}$ for $\mt{FROM}$ items and $\mt{SELECT1}$ for pre-queries.
--- a/lib/ur/basis.urs	Sat Jun 02 16:00:50 2012 -0400
+++ b/lib/ur/basis.urs	Sat Jun 02 16:47:09 2012 -0400
@@ -552,7 +552,7 @@
 val sql_summable_int : sql_summable int
 val sql_summable_float : sql_summable float
 val sql_summable_option : t ::: Type -> sql_summable t -> sql_summable (option t)
-val sql_avg : t ::: Type -> nt ::: Type -> sql_summable t -> nullify t nt -> sql_aggregate t nt
+val sql_avg : t ::: Type -> sql_summable t -> sql_aggregate t (option float)
 val sql_sum : t ::: Type -> nt ::: Type -> sql_summable t -> nullify t nt -> sql_aggregate t nt
 
 class sql_maxable
@@ -564,16 +564,25 @@
 val sql_max : t ::: Type -> nt ::: Type -> sql_maxable t -> nullify t nt -> sql_aggregate t nt
 val sql_min : t ::: Type -> nt ::: Type -> sql_maxable t -> nullify t nt -> sql_aggregate t nt
 
+con sql_partition :: {{Type}} -> {{Type}} -> {Type} -> Type
+val sql_no_partition : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type}
+                       -> sql_partition tables agg exps
+val sql_partition : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} -> t ::: Type
+                    -> sql_exp tables agg exps disallow_window t
+                    -> sql_partition tables agg exps
+
 con sql_window :: {{Type}} -> {{Type}} -> {Type} -> Type -> Type
 val sql_window : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type}
                  -> t ::: Type
                  -> sql_window tables agg exps t
+                 -> sql_partition tables agg exps
+                 -> sql_order_by tables exps
                  -> sql_exp tables agg exps allow_window t
 
 val sql_window_aggregate : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type}
                            -> t ::: Type -> nt ::: Type
                            -> sql_aggregate t nt
-                           -> sql_exp tables agg exps allow_window t
+                           -> sql_exp tables agg exps disallow_window t
                            -> sql_window tables agg exps nt
 val sql_window_count : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type}
                        -> sql_window tables agg exps int
--- a/src/monoize.sml	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/monoize.sml	Sat Jun 02 16:47:09 2012 -0400
@@ -299,6 +299,8 @@
                     (L'.TRecord [], loc)
                   | L.CApp ((L.CFfi ("Basis", "sql_maxable"), _), _) =>
                     (L'.TRecord [], loc)
+                  | L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_partition"), _), _), _), _), _), _) =>
+                    (L'.TFfi ("Basis", "string"), loc)
                   | L.CApp ((L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_window"), _), _), _), _), _), _), _), _) =>
                     (L'.TFfi ("Basis", "string"), loc)
                   | L.CApp ((L.CFfi ("Basis", "sql_arith"), _), _) =>
@@ -2744,10 +2746,9 @@
             ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TRecord [], loc),
                        (L'.ERecord [], loc)), loc),
              fm)
-          | L.ECApp ((L.ECApp ((L.EFfi ("Basis", "sql_avg"), _), _), _), _) =>
-            ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFun ((L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc)), loc),
-                       (L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc),
-                                 (L'.EPrim (Prim.String "AVG"), loc)), loc)), loc),
+          | L.ECApp ((L.EFfi ("Basis", "sql_avg"), _), _) =>
+            ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc),
+                       (L'.EPrim (Prim.String "AVG"), loc)), loc),
              fm)
           | L.ECApp ((L.ECApp ((L.EFfi ("Basis", "sql_sum"), _), _), _), _) =>
             ((L'.EAbs ("_", (L'.TRecord [], loc), (L'.TFun ((L'.TRecord [], loc), (L'.TFfi ("Basis", "string"), loc)), loc),
@@ -2783,6 +2784,29 @@
           | L.ECApp (
             (L.ECApp (
              (L.ECApp (
+               (L.EFfi ("Basis", "sql_no_partition"), _),
+               _), _),
+             _), _),
+            _) => ((L'.EPrim (Prim.String ""), loc), fm)
+          | L.ECApp (
+            (L.ECApp (
+             (L.ECApp (
+              (L.ECApp (
+               (L.EFfi ("Basis", "sql_partition"), _),
+               _), _),
+              _), _),
+             _), _),
+            _) =>
+            let
+                val s = (L'.TFfi ("Basis", "string"), loc)
+            in
+                ((L'.EAbs ("e", s, s, strcat [(L'.EPrim (Prim.String "PARTITION BY "), loc), (L'.ERel 0, loc)]), loc),
+                 fm)
+            end
+
+          | L.ECApp (
+            (L.ECApp (
+             (L.ECApp (
               (L.ECApp (
                (L.EFfi ("Basis", "sql_window"), _),
                _), _),
@@ -2790,13 +2814,31 @@
              _), _),
             _) =>
             let
+                val () = if #windowFunctions (Settings.currentDbms ()) then
+                             ()
+                         else
+                             ErrorMsg.errorAt loc "The DBMS you've selected doesn't support window functions."
+
                 val s = (L'.TFfi ("Basis", "string"), loc)
                 fun sc s = (L'.EPrim (Prim.String s), loc)
 
-                val main = strcat [(L'.ERel 0, loc),
-                                   sc " OVER ()"]
+                val main = strcat [(L'.ERel 2, loc),
+                                   sc " OVER (",
+                                   (L'.ERel 1, loc),
+                                   (L'.ECase ((L'.ERel 0, loc),
+                                              [((L'.PPrim (Prim.String ""), loc),
+                                                sc ""),
+                                               ((L'.PWild, loc),
+                                                strcat [sc " ORDER BY ",
+                                                        (L'.ERel 0, loc)])],
+                                              {disc = s,
+                                               result = s}), loc),
+                                   sc ")"]
             in
-                ((L'.EAbs ("w", s, s, main), loc),
+                ((L'.EAbs ("w", s, (L'.TFun (s, (L'.TFun (s, s), loc)), loc),
+                           (L'.EAbs ("p", s, (L'.TFun (s, s), loc),
+                                     (L'.EAbs ("o", s, s,
+                                               main), loc)), loc)), loc),
                  fm)
             end
 
--- a/src/mysql.sml	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/mysql.sml	Sat Jun 02 16:47:09 2012 -0400
@@ -1554,6 +1554,7 @@
                   trueString = "TRUE",
                   falseString = "FALSE",
                   onlyUnion = true,
-                  nestedRelops = false}
+                  nestedRelops = false,
+                  windowFunctions = false}
 
 end
--- a/src/postgres.sml	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/postgres.sml	Sat Jun 02 16:47:09 2012 -0400
@@ -1063,7 +1063,8 @@
                   trueString = "TRUE",
                   falseString = "FALSE",
                   onlyUnion = false,
-                  nestedRelops = true}
+                  nestedRelops = true,
+                  windowFunctions = true}
 
 val () = setDbms "postgres"
 
--- a/src/settings.sig	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/settings.sig	Sat Jun 02 16:47:09 2012 -0400
@@ -199,7 +199,8 @@
          trueString : string,
          falseString : string,
          onlyUnion : bool,
-         nestedRelops : bool
+         nestedRelops : bool,
+         windowFunctions : bool
     }
 
     val addDbms : dbms -> unit
--- a/src/settings.sml	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/settings.sml	Sat Jun 02 16:47:09 2012 -0400
@@ -537,7 +537,8 @@
      trueString : string,
      falseString : string,
      onlyUnion : bool,
-     nestedRelops : bool
+     nestedRelops : bool,
+     windowFunctions : bool
 }
 
 val dbmses = ref ([] : dbms list)
@@ -568,7 +569,8 @@
                   trueString = "",
                   falseString = "",
                   onlyUnion = false,
-                  nestedRelops = false} : dbms)
+                  nestedRelops = false,
+                  windowFunctions = false} : dbms)
 
 fun addDbms v = dbmses := v :: !dbmses
 fun setDbms s =
--- a/src/sqlite.sml	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/sqlite.sml	Sat Jun 02 16:47:09 2012 -0400
@@ -846,6 +846,7 @@
                   trueString = "1",
                   falseString = "0",
                   onlyUnion = false,
-                  nestedRelops = false}
+                  nestedRelops = false,
+                  windowFunctions = false}
 
 end
--- a/src/urweb.grm	Sat Jun 02 16:00:50 2012 -0400
+++ b/src/urweb.grm	Sat Jun 02 16:47:09 2012 -0400
@@ -303,6 +303,19 @@
         foldl (fn (s, e) => (EApp ((EApp ((EVar (["Basis"], "oneProperty", Infer), pos), e), pos), parseProperty s pos), pos))
                 (EVar (["Basis"], "noStyle", Infer), pos) props
 
+fun applyWindow loc e window =
+    let
+        val (pb, ob) = getOpt (window, ((EVar (["Basis"], "sql_no_partition", Infer), dummy),
+                                        (ECApp ((EVar (["Basis"], "sql_order_by_Nil", Infer), dummy),
+                                                (CWild (KRecord (KType, dummy), dummy), dummy)),
+                                         dummy)))
+        val e' = (EVar (["Basis"], "sql_window", Infer), loc)
+        val e' = (EApp (e', e), loc)
+        val e' = (EApp (e', pb), loc)
+    in
+        (EApp (e', ob), loc)
+    end
+
 %%
 %header (functor UrwebLrValsFn(structure Token : TOKEN))
 
@@ -456,7 +469,8 @@
  | selis of select_item list
  | select of select
  | sqlexp of exp
- | window of unit option
+ | window of (exp * exp) option
+ | pbopt of exp
  | wopt of exp
  | groupi of group_item
  | groupis of group_item list
@@ -2036,14 +2050,14 @@
                                                   let
                                                       val e = (EVar (["Basis"], "sql_window_count", Infer), loc)
                                                   in
-                                                      (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc)
+                                                      applyWindow loc e window
                                                   end
                                           end)
        | RANK UNIT window                (let
                                               val loc = s (RANKleft, windowright)
                                               val e = (EVar (["Basis"], "sql_window_rank", Infer), loc)
                                           in
-                                              (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc)
+                                              applyWindow loc e window
                                           end)
        | COUNT LPAREN sqlexp RPAREN window (let
                                                 val loc = s (COUNTleft, windowright)
@@ -2064,7 +2078,7 @@
                                                                        e), loc)
                                                         val e = (EApp (e, sqlexp), loc)
                                                     in
-                                                        (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc)
+                                                        applyWindow loc e window
                                                     end
                                             end)
        | sqlagg LPAREN sqlexp RPAREN window (let
@@ -2086,7 +2100,7 @@
                                                                         e), loc)
                                                          val e = (EApp (e, sqlexp), loc)
                                                      in
-                                                         (EApp ((EVar (["Basis"], "sql_window", Infer), loc), e), loc)
+                                                         applyWindow loc e window
                                                      end
                                              end)
        | COALESCE LPAREN sqlexp COMMA sqlexp RPAREN
@@ -2114,7 +2128,16 @@
                                          end)
 
 window :                                (NONE)
-       | OVER LPAREN RPAREN             (SOME ())
+       | OVER LPAREN pbopt obopt RPAREN (SOME (pbopt, obopt))
+
+pbopt  :                                ((EVar (["Basis"], "sql_no_partition", Infer), dummy))
+       | PARTITION BY sqlexp            (let
+                                             val loc = s (PARTITIONleft, sqlexpright)
+
+                                             val e = (EVar (["Basis"], "sql_partition", Infer), loc)
+                                         in
+                                             (EApp (e, sqlexp), loc)
+                                         end)
 
 fname  : SYMBOL                         (EVar (["Basis"], "sql_" ^ SYMBOL, Infer), s (SYMBOLleft, SYMBOLright))
        | LBRACE eexp RBRACE             (eexp)
--- a/tests/window.ur	Sat Jun 02 16:00:50 2012 -0400
+++ b/tests/window.ur	Sat Jun 02 16:47:09 2012 -0400
@@ -3,9 +3,11 @@
                    Salary : int }
 
 fun main () : transaction page =
-    x <- queryX (SELECT empsalary.Depname, empsalary.Empno, empsalary.Salary, RANK() AS R
+    x <- queryX (SELECT empsalary.Depname, empsalary.Empno, empsalary.Salary,
+                   RANK() OVER (PARTITION BY empsalary.Depname ORDER BY empsalary.Salary DESC) AS R,
+                   AVG(empsalary.Salary) OVER (PARTITION BY empsalary.Depname) AS A
                  FROM empsalary)
-                (fn r => <xml>{[r.Empsalary.Depname]}, {[r.Empsalary.Empno]}, {[r.Empsalary.Salary]}, {[r.R]}<br/></xml>);
+                (fn r => <xml>{[r.Empsalary.Depname]}, {[r.Empsalary.Empno]}, {[r.Empsalary.Salary]}, {[r.R]}, {[r.A]}<br/></xml>);
     return <xml><body>
       {x}
     </body></xml>