annotate doc/manual.tex @ 528:9e2abd85529b

Declarations and modules
author Adam Chlipala <adamc@hcoop.net>
date Thu, 27 Nov 2008 15:43:10 -0500
parents 74dd4dca9e32
children 4df69124e9c5
rev   line source
adamc@524 1 \documentclass{article}
adamc@524 2 \usepackage{fullpage,amsmath,amssymb,proof}
adamc@524 3
adamc@524 4 \newcommand{\cd}[1]{\texttt{#1}}
adamc@524 5 \newcommand{\mt}[1]{\mathsf{#1}}
adamc@524 6
adamc@524 7 \newcommand{\rc}{+ \hspace{-.075in} + \;}
adamc@527 8 \newcommand{\rcut}{\; \texttt{--} \;}
adamc@527 9 \newcommand{\rcutM}{\; \texttt{---} \;}
adamc@524 10
adamc@524 11 \begin{document}
adamc@524 12
adamc@524 13 \title{The Ur/Web Manual}
adamc@524 14 \author{Adam Chlipala}
adamc@524 15
adamc@524 16 \maketitle
adamc@524 17
adamc@524 18 \section{Syntax}
adamc@524 19
adamc@524 20 \subsection{Lexical Conventions}
adamc@524 21
adamc@524 22 We give the Ur language definition in \LaTeX $\;$ math mode, since that is prettier than monospaced ASCII. The corresponding ASCII syntax can be read off directly. Here is the key for mapping math symbols to ASCII character sequences.
adamc@524 23
adamc@524 24 \begin{center}
adamc@524 25 \begin{tabular}{rl}
adamc@524 26 \textbf{\LaTeX} & \textbf{ASCII} \\
adamc@524 27 $\to$ & \cd{->} \\
adamc@524 28 $\times$ & \cd{*} \\
adamc@524 29 $\lambda$ & \cd{fn} \\
adamc@524 30 $\Rightarrow$ & \cd{=>} \\
adamc@527 31 & \cd{---} \\
adamc@524 32 \\
adamc@524 33 $x$ & Normal textual identifier, not beginning with an uppercase letter \\
adamc@525 34 $X$ & Normal textual identifier, beginning with an uppercase letter \\
adamc@524 35 \end{tabular}
adamc@524 36 \end{center}
adamc@524 37
adamc@525 38 We often write syntax like $e^*$ to indicate zero or more copies of $e$, $e^+$ to indicate one or more copies, and $e,^*$ and $e,^+$ to indicate multiple copies separated by commas. Another separator may be used in place of a comma. The $e$ term may be surrounded by parentheses to indicate grouping; those parentheses should not be included in the actual ASCII.
adamc@524 39
adamc@526 40 We write $\ell$ for literals of the primitive types, for the most part following C conventions. There are $\mt{int}$, $\mt{float}$, and $\mt{string}$ literals.
adamc@526 41
adamc@527 42 This version of the manual doesn't include operator precedences; see \texttt{src/urweb.grm} for that.
adamc@527 43
adamc@524 44 \subsection{Core Syntax}
adamc@524 45
adamc@524 46 \emph{Kinds} classify types and other compile-time-only entities. Each kind in the grammar is listed with a description of the sort of data it classifies.
adamc@524 47 $$\begin{array}{rrcll}
adamc@524 48 \textrm{Kinds} & \kappa &::=& \mt{Type} & \textrm{proper types} \\
adamc@525 49 &&& \mt{Unit} & \textrm{the trivial constructor} \\
adamc@525 50 &&& \mt{Name} & \textrm{field names} \\
adamc@525 51 &&& \kappa \to \kappa & \textrm{type-level functions} \\
adamc@525 52 &&& \{\kappa\} & \textrm{type-level records} \\
adamc@525 53 &&& (\kappa\times^+) & \textrm{type-level tuples} \\
adamc@527 54 &&& \_ & \textrm{wildcard} \\
adamc@525 55 &&& (\kappa) & \textrm{explicit precedence} \\
adamc@524 56 \end{array}$$
adamc@524 57
adamc@524 58 Ur supports several different notions of functions that take types as arguments. These arguments can be either implicit, causing them to be inferred at use sites; or explicit, forcing them to be specified manually at use sites. There is a common explicitness annotation convention applied at the definitions of and in the types of such functions.
adamc@524 59 $$\begin{array}{rrcll}
adamc@524 60 \textrm{Explicitness} & ? &::=& :: & \textrm{explicit} \\
adamc@525 61 &&& \; ::: & \textrm{implicit}
adamc@524 62 \end{array}$$
adamc@524 63
adamc@524 64 \emph{Constructors} are the main class of compile-time-only data. They include proper types and are classified by kinds.
adamc@524 65 $$\begin{array}{rrcll}
adamc@524 66 \textrm{Constructors} & c, \tau &::=& (c) :: \kappa & \textrm{kind annotation} \\
adamc@525 67 &&& x & \textrm{constructor variable} \\
adamc@524 68 \\
adamc@525 69 &&& \tau \to \tau & \textrm{function type} \\
adamc@525 70 &&& x \; ? \; \kappa \to \tau & \textrm{polymorphic function type} \\
adamc@525 71 &&& \$ c & \textrm{record type} \\
adamc@524 72 \\
adamc@525 73 &&& c \; c & \textrm{type-level function application} \\
adamc@525 74 &&& \lambda x \; ? \; \kappa \Rightarrow c & \textrm{type-level function abstraction} \\
adamc@524 75 \\
adamc@525 76 &&& () & \textrm{type-level unit} \\
adamc@525 77 &&& \#X & \textrm{field name} \\
adamc@524 78 \\
adamc@525 79 &&& [(c = c)^*] & \textrm{known-length type-level record} \\
adamc@525 80 &&& c \rc c & \textrm{type-level record concatenation} \\
adamc@525 81 &&& \mt{fold} & \textrm{type-level record fold} \\
adamc@524 82 \\
adamc@525 83 &&& (c^+) & \textrm{type-level tuple} \\
adamc@525 84 &&& c.n & \textrm{type-level tuple projection ($n \in \mathbb N^+$)} \\
adamc@524 85 \\
adamc@525 86 &&& \lambda [c \sim c] \Rightarrow c & \textrm{guarded constructor} \\
adamc@524 87 \\
adamc@527 88 &&& \_ & \textrm{wildcard} \\
adamc@525 89 &&& (c) & \textrm{explicit precedence} \\
adamc@525 90 \end{array}$$
adamc@525 91
adamc@525 92 Modules of the module system are described by \emph{signatures}.
adamc@525 93 $$\begin{array}{rrcll}
adamc@525 94 \textrm{Signatures} & S &::=& \mt{sig} \; s^* \; \mt{end} & \textrm{constant} \\
adamc@525 95 &&& X & \textrm{variable} \\
adamc@525 96 &&& \mt{functor}(X : S) : S & \textrm{functor} \\
adamc@525 97 &&& S \; \mt{where} \; x = c & \textrm{concretizing an abstract constructor} \\
adamc@525 98 &&& M.X & \textrm{projection from a module} \\
adamc@525 99 \\
adamc@525 100 \textrm{Signature items} & s &::=& \mt{con} \; x :: \kappa & \textrm{abstract constructor} \\
adamc@525 101 &&& \mt{con} \; x :: \kappa = c & \textrm{concrete constructor} \\
adamc@528 102 &&& \mt{datatype} \; x \; x^* = dc\mid^+ & \textrm{algebraic datatype definition} \\
adamc@525 103 &&& \mt{datatype} \; x = M.x & \textrm{algebraic datatype import} \\
adamc@525 104 &&& \mt{val} \; x : \tau & \textrm{value} \\
adamc@525 105 &&& \mt{structure} \; X : S & \textrm{sub-module} \\
adamc@525 106 &&& \mt{signature} \; X = S & \textrm{sub-signature} \\
adamc@525 107 &&& \mt{include} \; S & \textrm{signature inclusion} \\
adamc@525 108 &&& \mt{constraint} \; c \sim c & \textrm{record disjointness constraint} \\
adamc@525 109 &&& \mt{class} \; x & \textrm{abstract type class} \\
adamc@525 110 &&& \mt{class} \; x = c & \textrm{concrete type class} \\
adamc@525 111 \\
adamc@525 112 \textrm{Datatype constructors} & dc &::=& X & \textrm{nullary constructor} \\
adamc@525 113 &&& X \; \mt{of} \; \tau & \textrm{unary constructor} \\
adamc@524 114 \end{array}$$
adamc@524 115
adamc@526 116 \emph{Patterns} are used to describe structural conditions on expressions, such that expressions may be tested against patterns, generating assignments to pattern variables if successful.
adamc@526 117 $$\begin{array}{rrcll}
adamc@526 118 \textrm{Patterns} & p &::=& \_ & \textrm{wildcard} \\
adamc@526 119 &&& x & \textrm{variable} \\
adamc@526 120 &&& \ell & \textrm{constant} \\
adamc@526 121 &&& \hat{X} & \textrm{nullary constructor} \\
adamc@526 122 &&& \hat{X} \; p & \textrm{unary constructor} \\
adamc@526 123 &&& \{(x = p,)^*\} & \textrm{rigid record pattern} \\
adamc@526 124 &&& \{(x = p,)^+, \ldots\} & \textrm{flexible record pattern} \\
adamc@527 125 &&& (p) & \textrm{explicit precedence} \\
adamc@526 126 \\
adamc@526 127 \textrm{Qualified capitalized variable} & \hat{X} &::=& X & \textrm{not from a module} \\
adamc@526 128 &&& M.X & \textrm{projection from a module} \\
adamc@526 129 \end{array}$$
adamc@526 130
adamc@527 131 \emph{Expressions} are the main run-time entities, corresponding to both ``expressions'' and ``statements'' in mainstream imperative languages.
adamc@527 132 $$\begin{array}{rrcll}
adamc@527 133 \textrm{Expressions} & e &::=& e : \tau & \textrm{type annotation} \\
adamc@527 134 &&& x & \textrm{variable} \\
adamc@527 135 &&& \ell & \textrm{constant} \\
adamc@527 136 \\
adamc@527 137 &&& e \; e & \textrm{function application} \\
adamc@527 138 &&& \lambda x : \tau \Rightarrow e & \textrm{function abstraction} \\
adamc@527 139 &&& e [c] & \textrm{polymorphic function application} \\
adamc@527 140 &&& \lambda x \; ? \; \kappa \Rightarrow e & \textrm{polymorphic function abstraction} \\
adamc@527 141 \\
adamc@527 142 &&& \{(c = e,)^*\} & \textrm{known-length record} \\
adamc@527 143 &&& e.c & \textrm{record field projection} \\
adamc@527 144 &&& e \rc e & \textrm{record concatenation} \\
adamc@527 145 &&& e \rcut c & \textrm{removal of a single record field} \\
adamc@527 146 &&& e \rcutM c & \textrm{removal of multiple record fields} \\
adamc@527 147 &&& \mt{fold} & \textrm{fold over fields of a type-level record} \\
adamc@527 148 \\
adamc@527 149 &&& \mt{let} \; ed^* \; \mt{in} \; e \; \mt{end} & \textrm{local definitions} \\
adamc@527 150 \\
adamc@527 151 &&& \mt{case} \; e \; \mt{of} \; (p \Rightarrow e|)^+ & \textrm{pattern matching} \\
adamc@527 152 \\
adamc@527 153 &&& \lambda [c \sim c] \Rightarrow e & \textrm{guarded expression} \\
adamc@527 154 \\
adamc@527 155 &&& \_ & \textrm{wildcard} \\
adamc@527 156 &&& (e) & \textrm{explicit precedence} \\
adamc@527 157 \\
adamc@527 158 \textrm{Local declarations} & ed &::=& \cd{val} \; x : \tau = e & \textrm{non-recursive value} \\
adamc@527 159 &&& \cd{val} \; \cd{rec} \; (x : \tau = e \; \cd{and})^+ & \textrm{mutually-recursive values} \\
adamc@527 160 \end{array}$$
adamc@527 161
adamc@528 162 \emph{Declarations} primarily bring new symbols into context.
adamc@528 163 $$\begin{array}{rrcll}
adamc@528 164 \textrm{Declarations} & d &::=& \mt{con} \; x :: \kappa = c & \textrm{constructor synonym} \\
adamc@528 165 &&& \mt{datatype} \; x \; x^* = dc\mid^+ & \textrm{algebraic datatype definition} \\
adamc@528 166 &&& \mt{datatype} \; x = M.x & \textrm{algebraic datatype import} \\
adamc@528 167 &&& \mt{val} \; x : \tau = e & \textrm{value} \\
adamc@528 168 &&& \mt{val} \; \cd{rec} \; (x : \tau = e \; \mt{and})^+ & \textrm{mutually-recursive values} \\
adamc@528 169 &&& \mt{structure} \; X : S = M & \textrm{module definition} \\
adamc@528 170 &&& \mt{signature} \; X = S & \textrm{signature definition} \\
adamc@528 171 &&& \mt{open} \; M & \textrm{module inclusion} \\
adamc@528 172 &&& \mt{constraint} \; c \sim c & \textrm{record disjointness constraint} \\
adamc@528 173 &&& \mt{open} \; \mt{constraints} \; M & \textrm{inclusion of just the constraints from a module} \\
adamc@528 174 &&& \mt{table} \; x : c & \textrm{SQL table} \\
adamc@528 175 &&& \mt{sequence} \; x & \textrm{SQL sequence} \\
adamc@528 176 &&& \mt{class} \; x = c & \textrm{concrete type class} \\
adamc@528 177 &&& \mt{cookie} \; x : c & \textrm{HTTP cookie} \\
adamc@528 178 \\
adamc@528 179 \textrm{Modules} & M &::=& \mt{struct} \; d^* \; \mt{end} & \mt{constant} \\
adamc@528 180 &&& X & \mt{variable} \\
adamc@528 181 &&& M.X & \mt{projection} \\
adamc@528 182 &&& M(M) & \mt{functor application} \\
adamc@528 183 &&& \mt{functor}(X : S) : S = M & \mt{functor abstraction} \\
adamc@528 184 \end{array}$$
adamc@528 185
adamc@528 186 There are two kinds of Ur files. A file named $M\texttt{.ur}$ is an \emph{implementation file}, and it should contain a sequence of declarations $d^*$. A file named $M\texttt{.urs}$ is an \emph{interface file}; it must always have a matching $M\texttt{.ur}$ and should contain a sequence of signature items $s^*$. When both files are present, the overall effect is the same as a monolithic declaration $\mt{structure} \; M : \mt{sig} \; s^* \; \mt{end} = \mt{struct} \; d^* \; \mt{end}$. When no interface file is included, the overall effect is similar, with a signature for module $M$ being inferred rather than just checked against an interface.
adamc@527 187
adamc@524 188 \end{document}