annotate src/ur/feed.ur @ 5:2717458d8951

Splitting fetching and traversal into separate functions
author Adam Chlipala <adam@chlipala.net>
date Thu, 13 Jan 2011 10:02:37 -0500
parents af95d9d73eb5
children e0bae488825c
rev   line source
adam@0 1 task initialize = fn () => FeedFfi.init
adam@0 2
adam@4 3 con pattern internal output = {Initial : internal,
adam@4 4 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
adam@4 5 ExitTag : internal -> option internal,
adam@4 6 Finished : internal -> option (output * bool)}
adam@4 7
adam@4 8 val null : pattern unit (variant []) =
adam@4 9 {Initial = (),
adam@4 10 EnterTag = fn _ () => Some (),
adam@4 11 ExitTag = fn () => Some (),
adam@4 12 Finished = fn () => None}
adam@1 13
adam@1 14 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
adam@1 15
adam@3 16 fun tagG [attrs ::: {Unit}] [t ::: Type] (fl : folder attrs) (accept : {Attrs : $(mapU string attrs), Cdata : option string} -> option t)
adam@3 17 (name : string) (attrs : $(mapU string attrs))
adam@3 18 : pattern (tagInternal attrs) t =
adam@4 19 {Initial = None,
adam@4 20 EnterTag = fn tinfo _ =>
adam@4 21 if tinfo.Tag <> name then
adam@4 22 None
adam@4 23 else
adam@4 24 case @foldUR [string] [fn r => option $(mapU string r)]
adam@4 25 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
adam@4 26 case ro of
adam@4 27 None => None
adam@4 28 | Some r =>
adam@4 29 case List.assoc aname tinfo.Attrs of
adam@4 30 None => None
adam@4 31 | Some v => Some ({nm = v} ++ r))
adam@4 32 (Some {}) fl attrs of
adam@4 33 None => None
adam@4 34 | Some vs =>
adam@4 35 let
adam@4 36 val v = {Attrs = vs, Cdata = tinfo.Cdata}
adam@4 37 in
adam@4 38 case accept v of
adam@4 39 None => None
adam@4 40 | Some _ => Some (Some v)
adam@4 41 end,
adam@4 42 ExitTag = fn _ => None,
adam@4 43 Finished = fn state => case state of
adam@4 44 None => None
adam@4 45 | Some state =>
adam@4 46 case accept state of
adam@4 47 None => None
adam@4 48 | Some v => Some (v, False)}
adam@3 49
adam@3 50 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@3 51 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
adam@3 52 @tagG fl Some name attrs
adam@3 53
adam@3 54 fun tagA [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@3 55 : pattern (tagInternal attrs) $(mapU string attrs) =
adam@3 56 @tagG fl (fn r => Some r.Attrs) name attrs
adam@3 57
adam@3 58 fun tagC (name : string) : pattern (tagInternal []) string =
adam@3 59 tagG (fn r => r.Cdata) name {}
adam@1 60
adam@4 61 datatype status a = Initial | Pending of a | Matched of a
adam@1 62
adam@1 63 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
adam@1 64
adam@1 65 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
adam@4 66 (parent : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
adam@1 67 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
adam@4 68 {Initial = None,
adam@4 69 EnterTag = fn tinfo state =>
adam@4 70 case state of
adam@4 71 None =>
adam@4 72 (case parent.EnterTag tinfo parent.Initial of
adam@4 73 None => None
adam@4 74 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
adam@4 75 (@@Folder.mp [fst] [_] fl))))
adam@4 76 | Some (pstate, depth, cstates) =>
adam@4 77 Some (Some (pstate,
adam@4 78 depth+1,
adam@4 79 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
adam@4 80 (fn [p] (ch : pattern p.1 p.2) (cstate : status p.1) =>
adam@4 81 case cstate of
adam@4 82 Initial =>
adam@4 83 (case ch.EnterTag tinfo ch.Initial of
adam@4 84 None => Initial
adam@4 85 | Some v =>
adam@4 86 case ch.Finished v of
adam@4 87 None => Pending v
adam@4 88 | _ => Matched v)
adam@4 89 | Pending cstate =>
adam@4 90 (case ch.EnterTag tinfo cstate of
adam@4 91 None => Initial
adam@4 92 | Some v =>
adam@4 93 case ch.Finished v of
adam@4 94 None => Pending v
adam@4 95 | _ => Matched v)
adam@4 96 | v => v)
adam@4 97 fl children cstates)),
adam@4 98 ExitTag = fn state =>
adam@4 99 case state of
adam@4 100 None => None
adam@4 101 | Some (pstate, 1, cstates) =>
adam@4 102 (case parent.ExitTag pstate of
adam@4 103 None => None
adam@4 104 | Some pstate => Some (Some (pstate, 0, cstates)))
adam@4 105 | Some (pstate, depth, cstates) =>
adam@4 106 Some (Some (pstate, depth-1,
adam@4 107 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
adam@4 108 (fn [p] (ch : pattern p.1 p.2) (cstate : status p.1) =>
adam@4 109 case cstate of
adam@4 110 Pending cstate =>
adam@4 111 (case ch.ExitTag cstate of
adam@4 112 None => Initial
adam@4 113 | Some cstate' =>
adam@4 114 case ch.Finished cstate' of
adam@4 115 None => Pending cstate'
adam@4 116 | _ => Matched cstate')
adam@4 117 | _ => cstate)
adam@4 118 fl children cstates)),
adam@4 119 Finished = fn state =>
adam@4 120 case state of
adam@4 121 Some (pstate, _, cstates) =>
adam@4 122 (case parent.Finished pstate of
adam@4 123 None => None
adam@4 124 | Some (pdata, pcont) =>
adam@4 125 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
adam@4 126 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] (ch : pattern p.1 p.2) (cstate : status p.1) acc =>
adam@4 127 case acc of
adam@4 128 None => None
adam@4 129 | Some acc =>
adam@4 130 case cstate of
adam@4 131 Matched cstate =>
adam@4 132 (case ch.Finished cstate of
adam@1 133 None => None
adam@4 134 | Some (cdata, _) => Some ({nm = cdata} ++ acc))
adam@4 135 | _ => None)
adam@4 136 (Some {}) fl children cstates of
adam@4 137 None => None
adam@4 138 | Some cdata => Some ((pdata, cdata), pcont))
adam@4 139 | _ => None}
adam@1 140
adam@4 141 con treeInternal (parent :: Type) (child :: Type) = option (parent * int * option child)
adam@4 142
adam@4 143 fun tree [parentI ::: Type] [parent ::: Type] [childI ::: Type] [child ::: Type]
adam@4 144 (parent : pattern parentI parent) (child : pattern childI child)
adam@4 145 : pattern (treeInternal parentI childI) (parent * child) =
adam@4 146 {Initial = None,
adam@4 147 EnterTag = fn tinfo state =>
adam@4 148 case state of
adam@4 149 None =>
adam@4 150 (case parent.EnterTag tinfo parent.Initial of
adam@4 151 None => None
adam@4 152 | Some pstate => Some (Some (pstate, 1, None)))
adam@4 153 | Some (pstate, depth, cstate) =>
adam@4 154 Some (Some (pstate,
adam@4 155 depth+1,
adam@4 156 child.EnterTag tinfo (Option.get child.Initial cstate))),
adam@4 157 ExitTag = fn state =>
adam@4 158 case state of
adam@4 159 None => None
adam@4 160 | Some (pstate, 1, cstate) =>
adam@4 161 (case parent.ExitTag pstate of
adam@4 162 None => None
adam@4 163 | Some pstate => Some (Some (pstate, 0, cstate)))
adam@4 164 | Some (pstate, depth, cstate) =>
adam@4 165 Some (Some (pstate, depth-1, Option.bind child.ExitTag cstate)),
adam@4 166 Finished = fn state =>
adam@4 167 case state of
adam@4 168 None => None
adam@4 169 | Some (pstate, _, cstate) =>
adam@4 170 case parent.Finished pstate of
adam@4 171 None => None
adam@4 172 | Some (pdata, _) =>
adam@4 173 case cstate of
adam@4 174 None => None
adam@4 175 | Some cstate =>
adam@4 176 case child.Finished cstate of
adam@4 177 None => None
adam@4 178 | Some (cdata, _) => Some ((pdata, cdata), True)}
adam@4 179
adam@5 180 type document = string
adam@5 181
adam@5 182 val fetch = FeedFfi.fetch
adam@5 183
adam@5 184 fun app [internal ::: Type] [data ::: Type] (p : pattern internal data) (f : data -> transaction {}) (doc : document) : transaction {} =
adam@1 185 let
adam@1 186 fun recur xml state =
adam@4 187 case String.seek xml #"<" of
adam@1 188 None => return ()
adam@4 189 | Some xml =>
adam@1 190 if xml <> "" && String.sub xml 0 = #"/" then
adam@4 191 case String.seek xml #"\x3E" of
adam@1 192 None => return ()
adam@4 193 | Some xml =>
adam@1 194 case p.ExitTag state of
adam@1 195 None => recur xml p.Initial
adam@1 196 | Some state =>
adam@1 197 case p.Finished state of
adam@1 198 None => recur xml state
adam@4 199 | Some (data, cont) =>
adam@1 200 f data;
adam@4 201 recur xml (if cont then state else p.Initial)
adam@1 202 else if xml <> "" && String.sub xml 0 = #"?" then
adam@4 203 case String.seek xml #"\x3E" of
adam@1 204 None => return ()
adam@4 205 | Some xml => recur xml state
adam@1 206 else if xml <> "" && String.sub xml 0 = #"!" then
adam@2 207 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
adam@1 208 let
adam@1 209 fun skipper xml =
adam@4 210 case String.seek xml #"-" of
adam@1 211 None => xml
adam@4 212 | Some xml =>
adam@2 213 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
adam@1 214 String.suffix xml 2
adam@1 215 else
adam@1 216 skipper xml
adam@1 217 in
adam@1 218 recur (skipper (String.suffix xml 3)) state
adam@1 219 end
adam@1 220 else
adam@4 221 case String.seek xml #"]" of
adam@1 222 None => return ()
adam@4 223 | Some xml =>
adam@4 224 case String.seek xml #"\x3E" of
adam@1 225 None => return ()
adam@4 226 | Some xml => recur xml state
adam@1 227 else
adam@1 228 case String.msplit {Needle = " >/", Haystack = xml} of
adam@1 229 None => return ()
adam@1 230 | Some (tagName, ch, xml) =>
adam@1 231 let
adam@1 232 fun readAttrs ch xml acc =
adam@1 233 case ch of
adam@1 234 #"\x3E" => (xml, acc, False)
adam@1 235 | #"/" =>
adam@4 236 (case String.seek xml #"\x3E" of
adam@1 237 None => (xml, acc, True)
adam@4 238 | Some xml => (xml, acc, True))
adam@1 239 | _ =>
adam@2 240 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then
adam@1 241 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
adam@1 242 else if xml <> "" && String.sub xml 0 = #"\x3E" then
adam@1 243 (String.suffix xml 1, acc, False)
adam@1 244 else if xml <> "" && String.sub xml 0 = #"/" then
adam@4 245 (case String.seek xml #"\x3E" of
adam@1 246 None => (xml, acc, True)
adam@4 247 | Some xml => (xml, acc, True))
adam@1 248 else
adam@1 249 case String.split xml #"=" of
adam@1 250 None => (xml, acc, False)
adam@1 251 | Some (aname, xml) =>
adam@1 252 if xml = "" || String.sub xml 0 <> #"\"" then
adam@1 253 (xml, (aname, "") :: acc, False)
adam@1 254 else
adam@1 255 case String.split (String.suffix xml 1) #"\"" of
adam@1 256 None => (xml, (aname, "") :: acc, False)
adam@1 257 | Some (value, xml) =>
adam@1 258 if xml = "" then
adam@1 259 (xml, (aname, value) :: acc, False)
adam@1 260 else
adam@1 261 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
adam@1 262
adam@1 263 val (xml, attrs, ended) = readAttrs ch xml []
adam@1 264
adam@1 265 fun skipSpaces xml =
adam@1 266 if xml <> "" && Char.isSpace (String.sub xml 0) then
adam@1 267 skipSpaces (String.suffix xml 1)
adam@1 268 else
adam@1 269 xml
adam@1 270
adam@1 271 val xml = skipSpaces xml
adam@1 272
adam@1 273 val (xml, cdata) =
adam@1 274 if ended then
adam@1 275 (xml, None)
adam@1 276 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
adam@1 277 let
adam@1 278 fun skipper xml acc =
adam@1 279 case String.split xml #"]" of
adam@1 280 None => (acc ^ xml, None)
adam@1 281 | Some (pre, xml) =>
adam@2 282 if String.lengthGe xml 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
adam@1 283 (String.suffix xml 2, Some (acc ^ pre))
adam@1 284 else
adam@1 285 skipper xml (acc ^ "]" ^ pre)
adam@1 286 in
adam@1 287 skipper (String.suffix xml 9) ""
adam@1 288 end
adam@1 289 else
adam@4 290 case String.split' xml #"<" of
adam@1 291 None => (xml, None)
adam@4 292 | Some (cdata, xml) => (xml, Some cdata)
adam@1 293 in
adam@1 294 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
adam@1 295 None => recur xml p.Initial
adam@1 296 | Some state =>
adam@4 297 case p.Finished state of
adam@4 298 None =>
adam@4 299 (case (if ended then p.ExitTag state else Some state) of
adam@4 300 None => recur xml p.Initial
adam@4 301 | Some state =>
adam@4 302 case p.Finished state of
adam@4 303 None => recur xml state
adam@4 304 | Some (data, cont) =>
adam@4 305 f data;
adam@4 306 recur xml (if cont then state else p.Initial))
adam@4 307 | Some (data, cont) =>
adam@4 308 f data;
adam@4 309 recur xml (if cont then state else p.Initial)
adam@1 310 end
adam@1 311 in
adam@5 312 recur doc p.Initial
adam@1 313 end