annotate src/ur/feed.ur @ 11:43c3fbd8527a

Add variant of children allowing specification of optional matches.
author Karn Kallio <kkallio@eka>
date Thu, 23 Jun 2011 23:40:29 -0430
parents edc2b467f818
children 7eea7ff1904c
rev   line source
adam@0 1 task initialize = fn () => FeedFfi.init
adam@0 2
adam@4 3 con pattern internal output = {Initial : internal,
adam@4 4 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
adam@4 5 ExitTag : internal -> option internal,
adam@4 6 Finished : internal -> option (output * bool)}
adam@4 7
adam@4 8 val null : pattern unit (variant []) =
adam@4 9 {Initial = (),
adam@4 10 EnterTag = fn _ () => Some (),
adam@4 11 ExitTag = fn () => Some (),
adam@4 12 Finished = fn () => None}
adam@1 13
adam@6 14 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU (option string) attrs), Cdata : option string}
adam@1 15
adam@6 16 fun tagG [attrs ::: {Unit}] [t ::: Type] (fl : folder attrs) (accept : {Attrs : $(mapU (option string) attrs), Cdata : option string} -> option t)
adam@3 17 (name : string) (attrs : $(mapU string attrs))
adam@3 18 : pattern (tagInternal attrs) t =
adam@4 19 {Initial = None,
adam@4 20 EnterTag = fn tinfo _ =>
adam@4 21 if tinfo.Tag <> name then
adam@4 22 None
adam@4 23 else
adam@6 24 let
adam@6 25 val v = {Attrs = @mp [fn _ => string] [fn _ => option string]
adam@6 26 (fn [u] aname => List.assoc aname tinfo.Attrs)
adam@6 27 fl attrs,
adam@6 28 Cdata = tinfo.Cdata}
adam@6 29 in
adam@6 30 case accept v of
adam@6 31 None => None
adam@6 32 | Some _ => Some (Some v)
adam@6 33 end,
adam@4 34 ExitTag = fn _ => None,
adam@4 35 Finished = fn state => case state of
adam@4 36 None => None
adam@4 37 | Some state =>
adam@4 38 case accept state of
adam@4 39 None => None
adam@4 40 | Some v => Some (v, False)}
adam@3 41
adam@6 42 fun allPresent [attrs ::: {Unit}] (fl : folder attrs) (attrs : $(mapU (option string) attrs)) : option $(mapU string attrs) =
adam@6 43 @foldUR [option string] [fn attrs => option $(mapU string attrs)]
adam@6 44 (fn [nm ::_] [r ::_] [[nm] ~ r] os acc =>
adam@6 45 case (os, acc) of
adam@6 46 (Some s, Some acc) => Some ({nm = s} ++ acc)
adam@6 47 | _ => None)
adam@6 48 (Some {}) fl attrs
adam@6 49
kkallio@9 50 fun allPresentE [attrs ::: {Unit}] (fl : folder attrs) (vs : $(mapU (option string) attrs)) (attrs : $(mapU (option string) attrs))
kkallio@9 51 : option $(mapU string attrs) =
kkallio@9 52 @foldUR2 [option string] [option string] [fn attrs => option $(mapU string attrs)]
kkallio@9 53 (fn [nm ::_] [r ::_] [[nm] ~ r] os os' acc =>
kkallio@9 54 case (os, os', acc) of
kkallio@9 55 (Some s, Some s', Some acc) => if s = s' then Some ({nm = s'} ++ acc) else None
kkallio@9 56 | (None, Some s', Some acc) => Some ({nm = s'} ++ acc)
kkallio@9 57 | _ => None)
kkallio@9 58 (Some {}) fl vs attrs
kkallio@9 59
adam@3 60 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@3 61 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
adam@6 62 @tagG fl (fn r =>
adam@6 63 case @allPresent fl r.Attrs of
adam@6 64 None => None
adam@6 65 | Some attrs => Some (r -- #Attrs ++ {Attrs = attrs}))
adam@6 66 name attrs
adam@3 67
adam@3 68 fun tagA [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@3 69 : pattern (tagInternal attrs) $(mapU string attrs) =
adam@6 70 @tagG fl (fn r => @allPresent fl r.Attrs) name attrs
kkallio@9 71
kkallio@9 72 fun tagAV [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU (string * option string) attrs))
kkallio@9 73 : pattern (tagInternal attrs) $(mapU string attrs) =
kkallio@9 74 let
kkallio@9 75 val as = @mp [fn _ => (string * option string)] [fn _ => string] (fn [u] (x, _) => x) fl attrs
kkallio@9 76 val vs = @mp [fn _ => (string * option string)] [fn _ => option string] (fn [u] (_, x) => x) fl attrs
kkallio@9 77 in
kkallio@9 78 @tagG fl (fn r => @allPresentE fl vs r.Attrs) name as
kkallio@9 79 end
kkallio@9 80
adam@6 81 fun tagAO [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@6 82 : pattern (tagInternal attrs) $(mapU (option string) attrs) =
adam@6 83 @tagG fl (fn r => Some (r.Attrs)) name attrs
adam@3 84
adam@3 85 fun tagC (name : string) : pattern (tagInternal []) string =
adam@3 86 tagG (fn r => r.Cdata) name {}
adam@1 87
adam@4 88 datatype status a = Initial | Pending of a | Matched of a
adam@1 89
adam@1 90 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
adam@1 91
adam@6 92 fun childrenG [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}] [t ::: Type]
adam@6 93 (ready : $(map (fn (i, d) => option d) children) -> option t)
adam@6 94 (parent : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
adam@6 95 : pattern (childrenInternal parentI (map fst children)) (parent * t) =
adam@4 96 {Initial = None,
adam@4 97 EnterTag = fn tinfo state =>
adam@4 98 case state of
adam@4 99 None =>
adam@4 100 (case parent.EnterTag tinfo parent.Initial of
adam@4 101 None => None
adam@4 102 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
adam@4 103 (@@Folder.mp [fst] [_] fl))))
adam@4 104 | Some (pstate, depth, cstates) =>
adam@6 105 if depth = 0 then
adam@6 106 case parent.EnterTag tinfo parent.Initial of
adam@6 107 None => None
adam@6 108 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
adam@6 109 (@@Folder.mp [fst] [_] fl)))
adam@6 110 else
adam@6 111 Some (Some (pstate,
adam@6 112 depth+1,
adam@6 113 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
adam@6 114 (fn [p] (ch : pattern p.1 p.2) (cstate : status p.1) =>
adam@6 115 case cstate of
adam@6 116 Initial =>
adam@6 117 (case ch.EnterTag tinfo ch.Initial of
adam@6 118 None => Initial
adam@6 119 | Some v =>
adam@6 120 case ch.Finished v of
adam@6 121 None => Pending v
adam@6 122 | _ => Matched v)
adam@6 123 | Pending cstate =>
adam@6 124 (case ch.EnterTag tinfo cstate of
adam@6 125 None => Initial
adam@6 126 | Some v =>
adam@6 127 case ch.Finished v of
adam@6 128 None => Pending v
adam@6 129 | _ => Matched v)
adam@6 130 | v => v)
adam@6 131 fl children cstates)),
adam@4 132 ExitTag = fn state =>
adam@4 133 case state of
adam@4 134 None => None
adam@6 135 | Some (pstate, 1, cstates) => Some (Some (pstate, 0, cstates))
adam@4 136 | Some (pstate, depth, cstates) =>
adam@4 137 Some (Some (pstate, depth-1,
adam@4 138 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
adam@4 139 (fn [p] (ch : pattern p.1 p.2) (cstate : status p.1) =>
adam@4 140 case cstate of
adam@4 141 Pending cstate =>
adam@4 142 (case ch.ExitTag cstate of
adam@4 143 None => Initial
adam@4 144 | Some cstate' =>
adam@4 145 case ch.Finished cstate' of
adam@4 146 None => Pending cstate'
adam@4 147 | _ => Matched cstate')
adam@4 148 | _ => cstate)
adam@4 149 fl children cstates)),
adam@4 150 Finished = fn state =>
adam@4 151 case state of
adam@6 152 Some (pstate, 0, cstates) =>
adam@4 153 (case parent.Finished pstate of
adam@4 154 None => None
adam@4 155 | Some (pdata, pcont) =>
adam@6 156 case ready (@map2 [fn (i, d) => status i] [fn (i, d) => pattern i d] [fn (i, d) => option d]
adam@6 157 (fn [p] (cstate : status p.1) (ch : pattern p.1 p.2) =>
adam@6 158 case cstate of
adam@6 159 Matched v => Option.mp (fn p => p.1) (ch.Finished v)
adam@6 160 | _ => None) fl cstates children) of
adam@4 161 None => None
adam@4 162 | Some cdata => Some ((pdata, cdata), pcont))
adam@4 163 | _ => None}
adam@1 164
adam@6 165 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
adam@6 166 (parent : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
adam@6 167 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
adam@6 168 @childrenG (@foldR [fn (i, d) => option d] [fn cs => option $(map snd cs)]
adam@6 169 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] (cstate : option p.2) acc =>
adam@6 170 case (cstate, acc) of
adam@6 171 (Some cstate, Some acc) => Some ({nm = cstate} ++ acc)
adam@6 172 | _ => None)
adam@6 173 (Some {}) fl) parent children fl
adam@6 174
adam@6 175 fun childrenO [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
adam@6 176 (parent : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
adam@6 177 : pattern (childrenInternal parentI (map fst children)) (parent * $(map (fn (i, d) => option d) children)) =
adam@6 178 @childrenG Some parent children fl
adam@6 179
kkallio@11 180 datatype required t = Required of t | Optional of t
kkallio@11 181
kkallio@11 182 fun childrenO' [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
kkallio@11 183 (parent : pattern parentI parent) (children : $(map (fn (i, d) => required (pattern i d)) children)) (fl : folder children)
kkallio@11 184 : pattern (childrenInternal parentI (map fst children)) (parent * $(map (fn (i, d) => option d) children)) =
kkallio@11 185 let
kkallio@11 186 val os = @mp [fn (i, d) => required (pattern i d)] [fn (i, d) => bool]
kkallio@11 187 (fn [u] pat => case pat of
kkallio@11 188 Required _ => False
kkallio@11 189 | Optional _ => True) fl children
kkallio@11 190 val vs = @mp [fn (i, d) => required (pattern i d)] [fn (i, d) => pattern i d]
kkallio@11 191 (fn [u] pat => case pat of
kkallio@11 192 Required pat' => pat'
kkallio@11 193 | Optional pat' => pat') fl children
kkallio@11 194 in
kkallio@11 195 @childrenG (@foldR2 [fn _ => bool] [fn (i, d) => option d] [fn r => option $(map (fn (i, d) => option d) r)]
kkallio@11 196 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] (isO : bool) (cstate : option p.2) acc =>
kkallio@11 197 case acc of
kkallio@11 198 None => None
kkallio@11 199 | Some acc =>
kkallio@11 200 if isO then
kkallio@11 201 Some ({nm = cstate} ++ acc)
kkallio@11 202 else
kkallio@11 203 case cstate of
kkallio@11 204 None => None
kkallio@11 205 | Some _ => Some ({nm = cstate} ++ acc))
kkallio@11 206 (Some {}) fl os) parent vs fl
kkallio@11 207 end
kkallio@11 208
adam@4 209 con treeInternal (parent :: Type) (child :: Type) = option (parent * int * option child)
adam@4 210
adam@4 211 fun tree [parentI ::: Type] [parent ::: Type] [childI ::: Type] [child ::: Type]
adam@4 212 (parent : pattern parentI parent) (child : pattern childI child)
adam@4 213 : pattern (treeInternal parentI childI) (parent * child) =
adam@4 214 {Initial = None,
adam@4 215 EnterTag = fn tinfo state =>
adam@4 216 case state of
adam@4 217 None =>
adam@4 218 (case parent.EnterTag tinfo parent.Initial of
adam@4 219 None => None
adam@4 220 | Some pstate => Some (Some (pstate, 1, None)))
adam@4 221 | Some (pstate, depth, cstate) =>
adam@4 222 Some (Some (pstate,
adam@4 223 depth+1,
adam@4 224 child.EnterTag tinfo (Option.get child.Initial cstate))),
adam@4 225 ExitTag = fn state =>
adam@4 226 case state of
adam@4 227 None => None
adam@6 228 | Some (_, 1, _) => None
adam@4 229 | Some (pstate, depth, cstate) =>
adam@4 230 Some (Some (pstate, depth-1, Option.bind child.ExitTag cstate)),
adam@4 231 Finished = fn state =>
adam@4 232 case state of
adam@4 233 None => None
adam@4 234 | Some (pstate, _, cstate) =>
adam@4 235 case parent.Finished pstate of
adam@4 236 None => None
adam@4 237 | Some (pdata, _) =>
adam@4 238 case cstate of
adam@4 239 None => None
adam@4 240 | Some cstate =>
adam@4 241 case child.Finished cstate of
adam@4 242 None => None
adam@4 243 | Some (cdata, _) => Some ((pdata, cdata), True)}
adam@4 244
adam@5 245 type document = string
adam@7 246 val show_document = _
adam@5 247
adam@5 248 val fetch = FeedFfi.fetch
adam@5 249
kkallio@10 250 fun app' [internal ::: Type] [data ::: Type] [acc ::: Type] (p : pattern internal data) (f : data -> acc -> transaction acc)
kkallio@10 251 (doc : document) (acc : acc) : transaction acc =
adam@1 252 let
kkallio@10 253 fun recur xml acc state =
adam@4 254 case String.seek xml #"<" of
kkallio@10 255 None => return acc
adam@4 256 | Some xml =>
adam@1 257 if xml <> "" && String.sub xml 0 = #"/" then
adam@4 258 case String.seek xml #"\x3E" of
kkallio@10 259 None => return acc
adam@4 260 | Some xml =>
adam@1 261 case p.ExitTag state of
kkallio@10 262 None => recur xml acc p.Initial
adam@1 263 | Some state =>
adam@1 264 case p.Finished state of
kkallio@10 265 None => recur xml acc state
adam@4 266 | Some (data, cont) =>
kkallio@10 267 acc <- f data acc;
kkallio@10 268 recur xml acc (if cont then state else p.Initial)
adam@1 269 else if xml <> "" && String.sub xml 0 = #"?" then
adam@4 270 case String.seek xml #"\x3E" of
kkallio@10 271 None => return acc
kkallio@10 272 | Some xml => recur xml acc state
adam@1 273 else if xml <> "" && String.sub xml 0 = #"!" then
adam@2 274 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
adam@1 275 let
adam@1 276 fun skipper xml =
adam@4 277 case String.seek xml #"-" of
adam@1 278 None => xml
adam@4 279 | Some xml =>
adam@2 280 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
adam@1 281 String.suffix xml 2
adam@1 282 else
adam@1 283 skipper xml
adam@1 284 in
kkallio@10 285 recur (skipper (String.suffix xml 3)) acc state
adam@1 286 end
adam@1 287 else
adam@4 288 case String.seek xml #"]" of
kkallio@10 289 None => return acc
adam@4 290 | Some xml =>
adam@4 291 case String.seek xml #"\x3E" of
kkallio@10 292 None => return acc
kkallio@10 293 | Some xml => recur xml acc state
adam@1 294 else
adam@1 295 case String.msplit {Needle = " >/", Haystack = xml} of
kkallio@10 296 None => return acc
adam@1 297 | Some (tagName, ch, xml) =>
adam@1 298 let
adam@1 299 fun readAttrs ch xml acc =
adam@1 300 case ch of
adam@1 301 #"\x3E" => (xml, acc, False)
adam@1 302 | #"/" =>
adam@4 303 (case String.seek xml #"\x3E" of
adam@1 304 None => (xml, acc, True)
adam@4 305 | Some xml => (xml, acc, True))
adam@1 306 | _ =>
adam@2 307 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then
adam@1 308 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
adam@1 309 else if xml <> "" && String.sub xml 0 = #"\x3E" then
adam@1 310 (String.suffix xml 1, acc, False)
adam@1 311 else if xml <> "" && String.sub xml 0 = #"/" then
adam@4 312 (case String.seek xml #"\x3E" of
adam@1 313 None => (xml, acc, True)
adam@4 314 | Some xml => (xml, acc, True))
adam@1 315 else
adam@1 316 case String.split xml #"=" of
adam@1 317 None => (xml, acc, False)
adam@1 318 | Some (aname, xml) =>
adam@6 319 if xml = "" || (String.sub xml 0 <> #"\"" && String.sub xml 0 <> #"'") then
adam@1 320 (xml, (aname, "") :: acc, False)
adam@1 321 else
adam@6 322 case String.split (String.suffix xml 1) (String.sub xml 0) of
adam@1 323 None => (xml, (aname, "") :: acc, False)
adam@1 324 | Some (value, xml) =>
adam@1 325 if xml = "" then
adam@1 326 (xml, (aname, value) :: acc, False)
adam@1 327 else
adam@1 328 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
adam@1 329
adam@1 330 val (xml, attrs, ended) = readAttrs ch xml []
adam@1 331
adam@1 332 fun skipSpaces xml =
adam@1 333 if xml <> "" && Char.isSpace (String.sub xml 0) then
adam@1 334 skipSpaces (String.suffix xml 1)
adam@1 335 else
adam@1 336 xml
adam@1 337
adam@1 338 val xml = skipSpaces xml
adam@1 339
adam@1 340 val (xml, cdata) =
adam@1 341 if ended then
adam@1 342 (xml, None)
adam@1 343 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
adam@1 344 let
adam@1 345 fun skipper xml acc =
adam@1 346 case String.split xml #"]" of
adam@1 347 None => (acc ^ xml, None)
adam@1 348 | Some (pre, xml) =>
adam@2 349 if String.lengthGe xml 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
adam@1 350 (String.suffix xml 2, Some (acc ^ pre))
adam@1 351 else
adam@1 352 skipper xml (acc ^ "]" ^ pre)
adam@1 353 in
adam@1 354 skipper (String.suffix xml 9) ""
adam@1 355 end
adam@1 356 else
adam@4 357 case String.split' xml #"<" of
adam@1 358 None => (xml, None)
adam@4 359 | Some (cdata, xml) => (xml, Some cdata)
adam@1 360 in
adam@1 361 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
kkallio@10 362 None => recur xml acc p.Initial
adam@1 363 | Some state =>
adam@4 364 case p.Finished state of
adam@4 365 None =>
adam@4 366 (case (if ended then p.ExitTag state else Some state) of
kkallio@10 367 None => recur xml acc p.Initial
adam@4 368 | Some state =>
adam@4 369 case p.Finished state of
kkallio@10 370 None => recur xml acc state
adam@4 371 | Some (data, cont) =>
kkallio@10 372 acc <- f data acc;
kkallio@10 373 recur xml acc (if cont then state else p.Initial))
adam@4 374 | Some (data, cont) =>
kkallio@10 375 acc <- f data acc;
kkallio@10 376 recur xml acc (if cont then state else p.Initial)
adam@1 377 end
adam@1 378 in
kkallio@10 379 recur doc acc p.Initial
adam@1 380 end
kkallio@10 381
kkallio@10 382 fun app [internal ::: Type] [data ::: Type] (p : pattern internal data) (f : data -> transaction {}) (doc : document) : transaction {} =
kkallio@10 383 app' p (fn data acc => f data) doc ()