annotate doc/manual.tex @ 529:4df69124e9c5

Shorthands
author Adam Chlipala <adamc@hcoop.net>
date Thu, 27 Nov 2008 16:55:30 -0500
parents 9e2abd85529b
children c2f9f94ea709
rev   line source
adamc@524 1 \documentclass{article}
adamc@524 2 \usepackage{fullpage,amsmath,amssymb,proof}
adamc@524 3
adamc@524 4 \newcommand{\cd}[1]{\texttt{#1}}
adamc@524 5 \newcommand{\mt}[1]{\mathsf{#1}}
adamc@524 6
adamc@524 7 \newcommand{\rc}{+ \hspace{-.075in} + \;}
adamc@527 8 \newcommand{\rcut}{\; \texttt{--} \;}
adamc@527 9 \newcommand{\rcutM}{\; \texttt{---} \;}
adamc@524 10
adamc@524 11 \begin{document}
adamc@524 12
adamc@524 13 \title{The Ur/Web Manual}
adamc@524 14 \author{Adam Chlipala}
adamc@524 15
adamc@524 16 \maketitle
adamc@524 17
adamc@529 18 \section{Ur Syntax}
adamc@529 19
adamc@529 20 In this section, we describe the syntax of Ur, deferring to a later section discussion of most of the syntax specific to SQL and XML. The sole exceptions are the declaration forms for tables, sequences, and cookies.
adamc@524 21
adamc@524 22 \subsection{Lexical Conventions}
adamc@524 23
adamc@524 24 We give the Ur language definition in \LaTeX $\;$ math mode, since that is prettier than monospaced ASCII. The corresponding ASCII syntax can be read off directly. Here is the key for mapping math symbols to ASCII character sequences.
adamc@524 25
adamc@524 26 \begin{center}
adamc@524 27 \begin{tabular}{rl}
adamc@524 28 \textbf{\LaTeX} & \textbf{ASCII} \\
adamc@524 29 $\to$ & \cd{->} \\
adamc@524 30 $\times$ & \cd{*} \\
adamc@524 31 $\lambda$ & \cd{fn} \\
adamc@524 32 $\Rightarrow$ & \cd{=>} \\
adamc@529 33 $\neq$ & \cd{<>} \\
adamc@529 34 $\leq$ & \cd{<=} \\
adamc@529 35 $\geq$ & \cd{>=} \\
adamc@524 36 \\
adamc@524 37 $x$ & Normal textual identifier, not beginning with an uppercase letter \\
adamc@525 38 $X$ & Normal textual identifier, beginning with an uppercase letter \\
adamc@524 39 \end{tabular}
adamc@524 40 \end{center}
adamc@524 41
adamc@525 42 We often write syntax like $e^*$ to indicate zero or more copies of $e$, $e^+$ to indicate one or more copies, and $e,^*$ and $e,^+$ to indicate multiple copies separated by commas. Another separator may be used in place of a comma. The $e$ term may be surrounded by parentheses to indicate grouping; those parentheses should not be included in the actual ASCII.
adamc@524 43
adamc@526 44 We write $\ell$ for literals of the primitive types, for the most part following C conventions. There are $\mt{int}$, $\mt{float}$, and $\mt{string}$ literals.
adamc@526 45
adamc@527 46 This version of the manual doesn't include operator precedences; see \texttt{src/urweb.grm} for that.
adamc@527 47
adamc@524 48 \subsection{Core Syntax}
adamc@524 49
adamc@524 50 \emph{Kinds} classify types and other compile-time-only entities. Each kind in the grammar is listed with a description of the sort of data it classifies.
adamc@524 51 $$\begin{array}{rrcll}
adamc@524 52 \textrm{Kinds} & \kappa &::=& \mt{Type} & \textrm{proper types} \\
adamc@525 53 &&& \mt{Unit} & \textrm{the trivial constructor} \\
adamc@525 54 &&& \mt{Name} & \textrm{field names} \\
adamc@525 55 &&& \kappa \to \kappa & \textrm{type-level functions} \\
adamc@525 56 &&& \{\kappa\} & \textrm{type-level records} \\
adamc@525 57 &&& (\kappa\times^+) & \textrm{type-level tuples} \\
adamc@529 58 &&& \_\_ & \textrm{wildcard} \\
adamc@525 59 &&& (\kappa) & \textrm{explicit precedence} \\
adamc@524 60 \end{array}$$
adamc@524 61
adamc@524 62 Ur supports several different notions of functions that take types as arguments. These arguments can be either implicit, causing them to be inferred at use sites; or explicit, forcing them to be specified manually at use sites. There is a common explicitness annotation convention applied at the definitions of and in the types of such functions.
adamc@524 63 $$\begin{array}{rrcll}
adamc@524 64 \textrm{Explicitness} & ? &::=& :: & \textrm{explicit} \\
adamc@525 65 &&& \; ::: & \textrm{implicit}
adamc@524 66 \end{array}$$
adamc@524 67
adamc@524 68 \emph{Constructors} are the main class of compile-time-only data. They include proper types and are classified by kinds.
adamc@524 69 $$\begin{array}{rrcll}
adamc@524 70 \textrm{Constructors} & c, \tau &::=& (c) :: \kappa & \textrm{kind annotation} \\
adamc@525 71 &&& x & \textrm{constructor variable} \\
adamc@524 72 \\
adamc@525 73 &&& \tau \to \tau & \textrm{function type} \\
adamc@525 74 &&& x \; ? \; \kappa \to \tau & \textrm{polymorphic function type} \\
adamc@525 75 &&& \$ c & \textrm{record type} \\
adamc@524 76 \\
adamc@525 77 &&& c \; c & \textrm{type-level function application} \\
adamc@525 78 &&& \lambda x \; ? \; \kappa \Rightarrow c & \textrm{type-level function abstraction} \\
adamc@524 79 \\
adamc@525 80 &&& () & \textrm{type-level unit} \\
adamc@525 81 &&& \#X & \textrm{field name} \\
adamc@524 82 \\
adamc@525 83 &&& [(c = c)^*] & \textrm{known-length type-level record} \\
adamc@525 84 &&& c \rc c & \textrm{type-level record concatenation} \\
adamc@525 85 &&& \mt{fold} & \textrm{type-level record fold} \\
adamc@524 86 \\
adamc@525 87 &&& (c^+) & \textrm{type-level tuple} \\
adamc@525 88 &&& c.n & \textrm{type-level tuple projection ($n \in \mathbb N^+$)} \\
adamc@524 89 \\
adamc@525 90 &&& \lambda [c \sim c] \Rightarrow c & \textrm{guarded constructor} \\
adamc@524 91 \\
adamc@529 92 &&& \_ :: \kappa & \textrm{wildcard} \\
adamc@525 93 &&& (c) & \textrm{explicit precedence} \\
adamc@525 94 \end{array}$$
adamc@525 95
adamc@525 96 Modules of the module system are described by \emph{signatures}.
adamc@525 97 $$\begin{array}{rrcll}
adamc@525 98 \textrm{Signatures} & S &::=& \mt{sig} \; s^* \; \mt{end} & \textrm{constant} \\
adamc@525 99 &&& X & \textrm{variable} \\
adamc@525 100 &&& \mt{functor}(X : S) : S & \textrm{functor} \\
adamc@529 101 &&& S \; \mt{where} \; \mt{con} \; x = c & \textrm{concretizing an abstract constructor} \\
adamc@525 102 &&& M.X & \textrm{projection from a module} \\
adamc@525 103 \\
adamc@525 104 \textrm{Signature items} & s &::=& \mt{con} \; x :: \kappa & \textrm{abstract constructor} \\
adamc@525 105 &&& \mt{con} \; x :: \kappa = c & \textrm{concrete constructor} \\
adamc@528 106 &&& \mt{datatype} \; x \; x^* = dc\mid^+ & \textrm{algebraic datatype definition} \\
adamc@529 107 &&& \mt{datatype} \; x = \mt{datatype} \; M.x & \textrm{algebraic datatype import} \\
adamc@525 108 &&& \mt{val} \; x : \tau & \textrm{value} \\
adamc@525 109 &&& \mt{structure} \; X : S & \textrm{sub-module} \\
adamc@525 110 &&& \mt{signature} \; X = S & \textrm{sub-signature} \\
adamc@525 111 &&& \mt{include} \; S & \textrm{signature inclusion} \\
adamc@525 112 &&& \mt{constraint} \; c \sim c & \textrm{record disjointness constraint} \\
adamc@525 113 &&& \mt{class} \; x & \textrm{abstract type class} \\
adamc@525 114 &&& \mt{class} \; x = c & \textrm{concrete type class} \\
adamc@525 115 \\
adamc@525 116 \textrm{Datatype constructors} & dc &::=& X & \textrm{nullary constructor} \\
adamc@525 117 &&& X \; \mt{of} \; \tau & \textrm{unary constructor} \\
adamc@524 118 \end{array}$$
adamc@524 119
adamc@526 120 \emph{Patterns} are used to describe structural conditions on expressions, such that expressions may be tested against patterns, generating assignments to pattern variables if successful.
adamc@526 121 $$\begin{array}{rrcll}
adamc@526 122 \textrm{Patterns} & p &::=& \_ & \textrm{wildcard} \\
adamc@526 123 &&& x & \textrm{variable} \\
adamc@526 124 &&& \ell & \textrm{constant} \\
adamc@526 125 &&& \hat{X} & \textrm{nullary constructor} \\
adamc@526 126 &&& \hat{X} \; p & \textrm{unary constructor} \\
adamc@526 127 &&& \{(x = p,)^*\} & \textrm{rigid record pattern} \\
adamc@526 128 &&& \{(x = p,)^+, \ldots\} & \textrm{flexible record pattern} \\
adamc@527 129 &&& (p) & \textrm{explicit precedence} \\
adamc@526 130 \\
adamc@529 131 \textrm{Qualified capitalized variables} & \hat{X} &::=& X & \textrm{not from a module} \\
adamc@526 132 &&& M.X & \textrm{projection from a module} \\
adamc@526 133 \end{array}$$
adamc@526 134
adamc@527 135 \emph{Expressions} are the main run-time entities, corresponding to both ``expressions'' and ``statements'' in mainstream imperative languages.
adamc@527 136 $$\begin{array}{rrcll}
adamc@527 137 \textrm{Expressions} & e &::=& e : \tau & \textrm{type annotation} \\
adamc@529 138 &&& \hat{x} & \textrm{variable} \\
adamc@529 139 &&& \hat{X} & \textrm{datatype constructor} \\
adamc@527 140 &&& \ell & \textrm{constant} \\
adamc@527 141 \\
adamc@527 142 &&& e \; e & \textrm{function application} \\
adamc@527 143 &&& \lambda x : \tau \Rightarrow e & \textrm{function abstraction} \\
adamc@527 144 &&& e [c] & \textrm{polymorphic function application} \\
adamc@527 145 &&& \lambda x \; ? \; \kappa \Rightarrow e & \textrm{polymorphic function abstraction} \\
adamc@527 146 \\
adamc@527 147 &&& \{(c = e,)^*\} & \textrm{known-length record} \\
adamc@527 148 &&& e.c & \textrm{record field projection} \\
adamc@527 149 &&& e \rc e & \textrm{record concatenation} \\
adamc@527 150 &&& e \rcut c & \textrm{removal of a single record field} \\
adamc@527 151 &&& e \rcutM c & \textrm{removal of multiple record fields} \\
adamc@527 152 &&& \mt{fold} & \textrm{fold over fields of a type-level record} \\
adamc@527 153 \\
adamc@527 154 &&& \mt{let} \; ed^* \; \mt{in} \; e \; \mt{end} & \textrm{local definitions} \\
adamc@527 155 \\
adamc@527 156 &&& \mt{case} \; e \; \mt{of} \; (p \Rightarrow e|)^+ & \textrm{pattern matching} \\
adamc@527 157 \\
adamc@527 158 &&& \lambda [c \sim c] \Rightarrow e & \textrm{guarded expression} \\
adamc@527 159 \\
adamc@527 160 &&& \_ & \textrm{wildcard} \\
adamc@527 161 &&& (e) & \textrm{explicit precedence} \\
adamc@527 162 \\
adamc@527 163 \textrm{Local declarations} & ed &::=& \cd{val} \; x : \tau = e & \textrm{non-recursive value} \\
adamc@527 164 &&& \cd{val} \; \cd{rec} \; (x : \tau = e \; \cd{and})^+ & \textrm{mutually-recursive values} \\
adamc@529 165 \\
adamc@529 166 \textrm{Qualified uncapitalized variables} & \hat{x} &::=& x & \textrm{not from a module} \\
adamc@529 167 &&& M.x & \textrm{projection from a module} \\
adamc@527 168 \end{array}$$
adamc@527 169
adamc@528 170 \emph{Declarations} primarily bring new symbols into context.
adamc@528 171 $$\begin{array}{rrcll}
adamc@528 172 \textrm{Declarations} & d &::=& \mt{con} \; x :: \kappa = c & \textrm{constructor synonym} \\
adamc@528 173 &&& \mt{datatype} \; x \; x^* = dc\mid^+ & \textrm{algebraic datatype definition} \\
adamc@529 174 &&& \mt{datatype} \; x = \mt{datatype} \; M.x & \textrm{algebraic datatype import} \\
adamc@528 175 &&& \mt{val} \; x : \tau = e & \textrm{value} \\
adamc@528 176 &&& \mt{val} \; \cd{rec} \; (x : \tau = e \; \mt{and})^+ & \textrm{mutually-recursive values} \\
adamc@528 177 &&& \mt{structure} \; X : S = M & \textrm{module definition} \\
adamc@528 178 &&& \mt{signature} \; X = S & \textrm{signature definition} \\
adamc@528 179 &&& \mt{open} \; M & \textrm{module inclusion} \\
adamc@528 180 &&& \mt{constraint} \; c \sim c & \textrm{record disjointness constraint} \\
adamc@528 181 &&& \mt{open} \; \mt{constraints} \; M & \textrm{inclusion of just the constraints from a module} \\
adamc@528 182 &&& \mt{table} \; x : c & \textrm{SQL table} \\
adamc@528 183 &&& \mt{sequence} \; x & \textrm{SQL sequence} \\
adamc@528 184 &&& \mt{class} \; x = c & \textrm{concrete type class} \\
adamc@529 185 &&& \mt{cookie} \; x : \tau & \textrm{HTTP cookie} \\
adamc@528 186 \\
adamc@529 187 \textrm{Modules} & M &::=& \mt{struct} \; d^* \; \mt{end} & \textrm{constant} \\
adamc@529 188 &&& X & \textrm{variable} \\
adamc@529 189 &&& M.X & \textrm{projection} \\
adamc@529 190 &&& M(M) & \textrm{functor application} \\
adamc@529 191 &&& \mt{functor}(X : S) : S = M & \textrm{functor abstraction} \\
adamc@528 192 \end{array}$$
adamc@528 193
adamc@528 194 There are two kinds of Ur files. A file named $M\texttt{.ur}$ is an \emph{implementation file}, and it should contain a sequence of declarations $d^*$. A file named $M\texttt{.urs}$ is an \emph{interface file}; it must always have a matching $M\texttt{.ur}$ and should contain a sequence of signature items $s^*$. When both files are present, the overall effect is the same as a monolithic declaration $\mt{structure} \; M : \mt{sig} \; s^* \; \mt{end} = \mt{struct} \; d^* \; \mt{end}$. When no interface file is included, the overall effect is similar, with a signature for module $M$ being inferred rather than just checked against an interface.
adamc@527 195
adamc@529 196 \subsection{Shorthands}
adamc@529 197
adamc@529 198 There are a variety of derived syntactic forms that elaborate into the core syntax from the last subsection. We will present the additional forms roughly following the order in which we presented the constructs that they elaborate into.
adamc@529 199
adamc@529 200 In many contexts where record fields are expected, like in a projection $e.c$, a constant field may be written as simply $X$, rather than $\#X$.
adamc@529 201
adamc@529 202 A record type may be written $\{(c = c,)^*\}$, which elaborates to $\$[(c = c,)^*]$.
adamc@529 203
adamc@529 204 A tuple type $(\tau_1, \ldots, \tau_n)$ expands to a record type $\{1 = \tau_1, \ldots, n = \tau_n\}$, with natural numbers as field names. A tuple pattern $(p_1, \ldots, p_n)$ expands to a rigid record pattern $\{1 = p_1, \ldots, n = p_n\}$. Positive natural numbers may be used in most places where field names would be allowed.
adamc@529 205
adamc@529 206 In general, several adjacent $\lambda$ forms may be combined into one, and kind and type annotations may be omitted, in which case they are implicitly included as wildcards. More formally, for constructor-level abstractions, we can define a new non-terminal $b ::= x \mid (x :: \kappa) \mid [c \sim c]$ and allow composite abstractions of the form $\lambda b^+ \Rightarrow c$, elaborating into the obvious sequence of one core $\lambda$ per element of $b^+$.
adamc@529 207
adamc@529 208 For any signature item or declaration that defines some entity to be equal to $A$ with classification annotation $B$ (e.g., $\mt{val} \; x : B = A$), $B$ and the preceding colon (or similar punctuation) may be omitted, in which case it is filled in as a wildcard.
adamc@529 209
adamc@529 210 A signature item or declaration $\mt{type} \; x$ or $\mt{type} \; x = \tau$ is elaborated into $\mt{con} \; x :: \mt{Type}$ or $\mt{con} \; x :: \mt{Type} = \tau$, respectively.
adamc@529 211
adamc@529 212 A signature item or declaration $\mt{class} \; x = \lambda y :: \mt{Type} \Rightarrow c$ may be abbreviated $\mt{class} \; x \; y = c$.
adamc@529 213
adamc@529 214 Handling of implicit and explicit constructor arguments may be tweaked with some prefixes to variable references. An expression $@x$ is a version of $x$ where all implicit constructor arguments have been made explicit. An expression $@@x$ achieves the same effect, additionally halting automatic resolution of type class instances. The same syntax works for variables projected out of modules and for capitalized variables (datatype constructors).
adamc@529 215
adamc@529 216 At the expression level, an analogue is available of the composite $\lambda$ form for constructors. We define the language of binders as $b ::= x \mid (x : \tau) \mid (x \; ? \; \kappa) \mid [c \sim c]$. A lone variable $x$ as a binder stands for an expression variable of unspecified type.
adamc@529 217
adamc@529 218 A $\mt{val}$ or $\mt{val} \; \mt{rec}$ declaration may include expression binders before the equal sign, following the binder grammar from the last paragraph. Such declarations are elaborated into versions that add additional $\lambda$s to the fronts of the righthand sides, as appropriate. The keyword $\mt{fun}$ is a synonym for $\mt{val} \; \mt{rec}$.
adamc@529 219
adamc@529 220 A signature item $\mt{functor} \; X_1 \; (X_2 : S_1) : S_2$ is elaborated into $\mt{structure} \; X_1 : \mt{functor}(X_2 : S_1) : S_2$. A declaration $\mt{functor} \; X_1 \; (X_2 : S_1) : S_2 = M$ is elaborated into $\mt{structure} \; X_1 : \mt{functor}(X_2 : S_1) : S_2 = \mt{functor}(X_2 : S_1) : S_2 = M$.
adamc@529 221
adamc@529 222 A declaration $\mt{table} \; x : \{(c = c,)^*\}$ is elaborated into $\mt{table} \; x : [(c = c,)^*]$
adamc@529 223
adamc@529 224 The syntax $\mt{where} \; \mt{type}$ is an alternate form of $\mt{where} \; \mt{con}$.
adamc@529 225
adamc@529 226 The syntax $\mt{if} \; e \; \mt{then} \; e_1 \; \mt{else} \; e_2$ expands to $\mt{case} \; e \; \mt{of} \; \mt{Basis}.\mt{True} \Rightarrow e_1 \mid \mt{Basis}.\mt{False} \Rightarrow e_2$.
adamc@529 227
adamc@529 228 There are infix operator syntaxes for a number of functions defined in the $\mt{Basis}$ module. There is $=$ for $\mt{eq}$, $\neq$ for $\mt{neq}$, $-$ for $\mt{neg}$ (as a prefix operator) and $\mt{minus}$, $+$ for $\mt{plus}$, $\times$ for $\mt{times}$, $/$ for $\mt{div}$, $\%$ for $\mt{mod}$, $<$ for $\mt{lt}$, $\leq$ for $\mt{le}$, $>$ for $\mt{gt}$, and $\geq$ for $\mt{ge}$.
adamc@529 229
adamc@529 230 A signature item $\mt{table} \; x : c$ is shorthand for $\mt{val} \; x : \mt{Basis}.\mt{sql\_table} \; c$. $\mt{sequence} \; x$ is short for $\mt{val} \; x : \mt{Basis}.\mt{sql\_sequence}$, and $\mt{cookie} \; x : \tau$ is shorthand for $\mt{val} \; x : \mt{Basis}.\mt{http\_cookie} \; \tau$.
adamc@529 231
adamc@524 232 \end{document}