annotate src/ur/feed.ur @ 3:ea0ca570c121

Shortcut tag combinators
author Adam Chlipala <adam@chlipala.net>
date Tue, 11 Jan 2011 14:19:51 -0500
parents 2ec84d349838
children af95d9d73eb5
rev   line source
adam@0 1 task initialize = fn () => FeedFfi.init
adam@0 2
adam@1 3 datatype pattern internal output =
adam@1 4 Transducer of {Initial : internal,
adam@1 5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
adam@1 6 ExitTag : internal -> option internal,
adam@1 7 Finished : internal -> option output}
adam@1 8
adam@1 9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
adam@1 10
adam@3 11 fun tagG [attrs ::: {Unit}] [t ::: Type] (fl : folder attrs) (accept : {Attrs : $(mapU string attrs), Cdata : option string} -> option t)
adam@3 12 (name : string) (attrs : $(mapU string attrs))
adam@3 13 : pattern (tagInternal attrs) t =
adam@1 14 Transducer {Initial = None,
adam@1 15 EnterTag = fn tinfo state =>
adam@1 16 case state of
adam@1 17 Some _ => None
adam@1 18 | None =>
adam@1 19 if tinfo.Tag <> name then
adam@1 20 None
adam@1 21 else
adam@1 22 case @foldUR [string] [fn r => option $(mapU string r)]
adam@1 23 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
adam@1 24 case ro of
adam@1 25 None => None
adam@1 26 | Some r =>
adam@1 27 case List.assoc aname tinfo.Attrs of
adam@1 28 None => None
adam@1 29 | Some v => Some ({nm = v} ++ r))
adam@1 30 (Some {}) fl attrs of
adam@1 31 None => None
adam@3 32 | Some vs =>
adam@3 33 let
adam@3 34 val v = {Attrs = vs, Cdata = tinfo.Cdata}
adam@3 35 in
adam@3 36 case accept v of
adam@3 37 None => None
adam@3 38 | Some _ => Some (Some v)
adam@3 39 end,
adam@1 40 ExitTag = Some,
adam@3 41 Finished = Option.bind accept}
adam@3 42
adam@3 43 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@3 44 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
adam@3 45 @tagG fl Some name attrs
adam@3 46
adam@3 47 fun tagA [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@3 48 : pattern (tagInternal attrs) $(mapU string attrs) =
adam@3 49 @tagG fl (fn r => Some r.Attrs) name attrs
adam@3 50
adam@3 51 fun tagC (name : string) : pattern (tagInternal []) string =
adam@3 52 tagG (fn r => r.Cdata) name {}
adam@1 53
adam@1 54 datatype status a = Initial | Failed | Matched of a
adam@1 55
adam@1 56 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
adam@1 57
adam@1 58 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
adam@1 59 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
adam@1 60 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
adam@1 61 Transducer {Initial = None,
adam@1 62 EnterTag = fn tinfo state =>
adam@1 63 case state of
adam@1 64 None =>
adam@1 65 (case parent.EnterTag tinfo parent.Initial of
adam@1 66 None => None
adam@1 67 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
adam@1 68 (@@Folder.mp [fst] [_] fl))))
adam@1 69 | Some (pstate, depth, cstates) =>
adam@1 70 Some (Some (pstate,
adam@1 71 depth+1,
adam@1 72 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
adam@1 73 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) =>
adam@1 74 case cstate of
adam@1 75 Failed => Failed
adam@1 76 | Initial =>
adam@1 77 (case ch.EnterTag tinfo ch.Initial of
adam@1 78 None => Failed
adam@1 79 | Some v => Matched v)
adam@1 80 | v => v)
adam@1 81 fl children cstates)),
adam@1 82 ExitTag = fn state =>
adam@1 83 case state of
adam@1 84 None => None
adam@1 85 | Some (pstate, depth, cstates) =>
adam@1 86 case (if depth = 1 then
adam@1 87 parent.ExitTag pstate
adam@1 88 else
adam@1 89 Some pstate) of
adam@1 90 None => None
adam@1 91 | Some pstate =>
adam@1 92 if depth = 1 then
adam@1 93 Some (Some (pstate, 0, cstates))
adam@1 94 else
adam@1 95 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i]
adam@1 96 [fn cs => option $(map (fn (i, d) => status i) cs)]
adam@1 97 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
adam@1 98 case acc of
adam@1 99 None => None
adam@1 100 | Some acc =>
adam@1 101 case cstate of
adam@1 102 Matched cstate =>
adam@1 103 (case ch.ExitTag cstate of
adam@1 104 None => None
adam@1 105 | Some cstate' => Some ({nm = Matched cstate'} ++ acc))
adam@1 106 | _ => Some ({nm = Initial} ++ acc))
adam@1 107 (Some {}) fl children cstates of
adam@1 108 None => None
adam@1 109 | Some cstates =>
adam@1 110 Some (Some (pstate, depth-1, cstates)),
adam@1 111 Finished = fn state =>
adam@1 112 case state of
adam@1 113 Some (pstate, 0, cstates) =>
adam@1 114 (case parent.Finished pstate of
adam@1 115 None => None
adam@1 116 | Some pdata =>
adam@1 117 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
adam@1 118 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
adam@1 119 case acc of
adam@1 120 None => None
adam@1 121 | Some acc =>
adam@1 122 case cstate of
adam@1 123 Initial => None
adam@1 124 | Failed => None
adam@1 125 | Matched cstate =>
adam@1 126 case ch.Finished cstate of
adam@1 127 None => None
adam@1 128 | Some cdata => Some ({nm = cdata} ++ acc))
adam@1 129 (Some {}) fl children cstates of
adam@1 130 None => None
adam@1 131 | Some cdata => Some (pdata, cdata))
adam@1 132 | _ => None}
adam@1 133
adam@1 134 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
adam@1 135 let
adam@1 136 fun recur xml state =
adam@1 137 case String.split xml #"<" of
adam@1 138 None => return ()
adam@1 139 | Some (_, xml) =>
adam@1 140 if xml <> "" && String.sub xml 0 = #"/" then
adam@1 141 case String.split xml #"\x3E" of
adam@1 142 None => return ()
adam@1 143 | Some (_, xml) =>
adam@1 144 case p.ExitTag state of
adam@1 145 None => recur xml p.Initial
adam@1 146 | Some state =>
adam@1 147 case p.Finished state of
adam@1 148 None => recur xml state
adam@1 149 | Some data =>
adam@1 150 f data;
adam@1 151 recur xml p.Initial
adam@1 152 else if xml <> "" && String.sub xml 0 = #"?" then
adam@1 153 case String.split xml #"\x3E" of
adam@1 154 None => return ()
adam@1 155 | Some (_, xml) => recur xml state
adam@1 156 else if xml <> "" && String.sub xml 0 = #"!" then
adam@2 157 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
adam@1 158 let
adam@1 159 fun skipper xml =
adam@1 160 case String.split xml #"-" of
adam@1 161 None => xml
adam@1 162 | Some (_, xml) =>
adam@2 163 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
adam@1 164 String.suffix xml 2
adam@1 165 else
adam@1 166 skipper xml
adam@1 167 in
adam@1 168 recur (skipper (String.suffix xml 3)) state
adam@1 169 end
adam@1 170 else
adam@1 171 case String.split xml #"]" of
adam@1 172 None => return ()
adam@1 173 | Some (_, xml) =>
adam@1 174 case String.split xml #"\x3E" of
adam@1 175 None => return ()
adam@1 176 | Some (_, xml) => recur xml state
adam@1 177 else
adam@1 178 case String.msplit {Needle = " >/", Haystack = xml} of
adam@1 179 None => return ()
adam@1 180 | Some (tagName, ch, xml) =>
adam@1 181 let
adam@1 182 fun readAttrs ch xml acc =
adam@1 183 case ch of
adam@1 184 #"\x3E" => (xml, acc, False)
adam@1 185 | #"/" =>
adam@1 186 (case String.split xml #"\x3E" of
adam@1 187 None => (xml, acc, True)
adam@1 188 | Some (_, xml) => (xml, acc, True))
adam@1 189 | _ =>
adam@2 190 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then
adam@1 191 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
adam@1 192 else if xml <> "" && String.sub xml 0 = #"\x3E" then
adam@1 193 (String.suffix xml 1, acc, False)
adam@1 194 else if xml <> "" && String.sub xml 0 = #"/" then
adam@1 195 (case String.split xml #"\x3E" of
adam@1 196 None => (xml, acc, True)
adam@1 197 | Some (_, xml) => (xml, acc, True))
adam@1 198 else
adam@1 199 case String.split xml #"=" of
adam@1 200 None => (xml, acc, False)
adam@1 201 | Some (aname, xml) =>
adam@1 202 if xml = "" || String.sub xml 0 <> #"\"" then
adam@1 203 (xml, (aname, "") :: acc, False)
adam@1 204 else
adam@1 205 case String.split (String.suffix xml 1) #"\"" of
adam@1 206 None => (xml, (aname, "") :: acc, False)
adam@1 207 | Some (value, xml) =>
adam@1 208 if xml = "" then
adam@1 209 (xml, (aname, value) :: acc, False)
adam@1 210 else
adam@1 211 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
adam@1 212
adam@1 213 val (xml, attrs, ended) = readAttrs ch xml []
adam@1 214
adam@1 215 fun skipSpaces xml =
adam@1 216 if xml <> "" && Char.isSpace (String.sub xml 0) then
adam@1 217 skipSpaces (String.suffix xml 1)
adam@1 218 else
adam@1 219 xml
adam@1 220
adam@1 221 val xml = skipSpaces xml
adam@1 222
adam@1 223 val (xml, cdata) =
adam@1 224 if ended then
adam@1 225 (xml, None)
adam@1 226 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
adam@1 227 let
adam@1 228 fun skipper xml acc =
adam@1 229 case String.split xml #"]" of
adam@1 230 None => (acc ^ xml, None)
adam@1 231 | Some (pre, xml) =>
adam@2 232 if String.lengthGe xml 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
adam@1 233 (String.suffix xml 2, Some (acc ^ pre))
adam@1 234 else
adam@1 235 skipper xml (acc ^ "]" ^ pre)
adam@1 236 in
adam@1 237 skipper (String.suffix xml 9) ""
adam@1 238 end
adam@1 239 else
adam@1 240 case String.split xml #"<" of
adam@1 241 None => (xml, None)
adam@1 242 | Some (cdata, xml) => ("<" ^ xml, Some cdata)
adam@1 243 in
adam@1 244 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
adam@1 245 None => recur xml p.Initial
adam@1 246 | Some state =>
adam@1 247 case (if ended then p.ExitTag state else Some state) of
adam@1 248 None => recur xml p.Initial
adam@1 249 | Some state =>
adam@1 250 case p.Finished state of
adam@1 251 None => recur xml state
adam@1 252 | Some data =>
adam@1 253 f data;
adam@1 254 recur xml p.Initial
adam@1 255 end
adam@1 256 in
adam@1 257 xml <- FeedFfi.fetch url;
adam@1 258 recur xml p.Initial
adam@1 259 end