annotate src/ur/feed.ur @ 2:2ec84d349838

String processing optimization
author Adam Chlipala <adam@chlipala.net>
date Tue, 11 Jan 2011 14:05:34 -0500
parents 8de269c09617
children ea0ca570c121
rev   line source
adam@0 1 task initialize = fn () => FeedFfi.init
adam@0 2
adam@1 3 datatype pattern internal output =
adam@1 4 Transducer of {Initial : internal,
adam@1 5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
adam@1 6 ExitTag : internal -> option internal,
adam@1 7 Finished : internal -> option output}
adam@1 8
adam@1 9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
adam@1 10
adam@1 11 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
adam@1 12 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
adam@1 13 Transducer {Initial = None,
adam@1 14 EnterTag = fn tinfo state =>
adam@1 15 case state of
adam@1 16 Some _ => None
adam@1 17 | None =>
adam@1 18 if tinfo.Tag <> name then
adam@1 19 None
adam@1 20 else
adam@1 21 case @foldUR [string] [fn r => option $(mapU string r)]
adam@1 22 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
adam@1 23 case ro of
adam@1 24 None => None
adam@1 25 | Some r =>
adam@1 26 case List.assoc aname tinfo.Attrs of
adam@1 27 None => None
adam@1 28 | Some v => Some ({nm = v} ++ r))
adam@1 29 (Some {}) fl attrs of
adam@1 30 None => None
adam@1 31 | Some vs => Some (Some {Attrs = vs, Cdata = tinfo.Cdata}),
adam@1 32 ExitTag = Some,
adam@1 33 Finished = fn x => x}
adam@1 34
adam@1 35 datatype status a = Initial | Failed | Matched of a
adam@1 36
adam@1 37 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
adam@1 38
adam@1 39 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
adam@1 40 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
adam@1 41 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
adam@1 42 Transducer {Initial = None,
adam@1 43 EnterTag = fn tinfo state =>
adam@1 44 case state of
adam@1 45 None =>
adam@1 46 (case parent.EnterTag tinfo parent.Initial of
adam@1 47 None => None
adam@1 48 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
adam@1 49 (@@Folder.mp [fst] [_] fl))))
adam@1 50 | Some (pstate, depth, cstates) =>
adam@1 51 Some (Some (pstate,
adam@1 52 depth+1,
adam@1 53 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
adam@1 54 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) =>
adam@1 55 case cstate of
adam@1 56 Failed => Failed
adam@1 57 | Initial =>
adam@1 58 (case ch.EnterTag tinfo ch.Initial of
adam@1 59 None => Failed
adam@1 60 | Some v => Matched v)
adam@1 61 | v => v)
adam@1 62 fl children cstates)),
adam@1 63 ExitTag = fn state =>
adam@1 64 case state of
adam@1 65 None => None
adam@1 66 | Some (pstate, depth, cstates) =>
adam@1 67 case (if depth = 1 then
adam@1 68 parent.ExitTag pstate
adam@1 69 else
adam@1 70 Some pstate) of
adam@1 71 None => None
adam@1 72 | Some pstate =>
adam@1 73 if depth = 1 then
adam@1 74 Some (Some (pstate, 0, cstates))
adam@1 75 else
adam@1 76 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i]
adam@1 77 [fn cs => option $(map (fn (i, d) => status i) cs)]
adam@1 78 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
adam@1 79 case acc of
adam@1 80 None => None
adam@1 81 | Some acc =>
adam@1 82 case cstate of
adam@1 83 Matched cstate =>
adam@1 84 (case ch.ExitTag cstate of
adam@1 85 None => None
adam@1 86 | Some cstate' => Some ({nm = Matched cstate'} ++ acc))
adam@1 87 | _ => Some ({nm = Initial} ++ acc))
adam@1 88 (Some {}) fl children cstates of
adam@1 89 None => None
adam@1 90 | Some cstates =>
adam@1 91 Some (Some (pstate, depth-1, cstates)),
adam@1 92 Finished = fn state =>
adam@1 93 case state of
adam@1 94 Some (pstate, 0, cstates) =>
adam@1 95 (case parent.Finished pstate of
adam@1 96 None => None
adam@1 97 | Some pdata =>
adam@1 98 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
adam@1 99 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
adam@1 100 case acc of
adam@1 101 None => None
adam@1 102 | Some acc =>
adam@1 103 case cstate of
adam@1 104 Initial => None
adam@1 105 | Failed => None
adam@1 106 | Matched cstate =>
adam@1 107 case ch.Finished cstate of
adam@1 108 None => None
adam@1 109 | Some cdata => Some ({nm = cdata} ++ acc))
adam@1 110 (Some {}) fl children cstates of
adam@1 111 None => None
adam@1 112 | Some cdata => Some (pdata, cdata))
adam@1 113 | _ => None}
adam@1 114
adam@1 115 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
adam@1 116 let
adam@1 117 fun recur xml state =
adam@1 118 case String.split xml #"<" of
adam@1 119 None => return ()
adam@1 120 | Some (_, xml) =>
adam@1 121 if xml <> "" && String.sub xml 0 = #"/" then
adam@1 122 case String.split xml #"\x3E" of
adam@1 123 None => return ()
adam@1 124 | Some (_, xml) =>
adam@1 125 case p.ExitTag state of
adam@1 126 None => recur xml p.Initial
adam@1 127 | Some state =>
adam@1 128 case p.Finished state of
adam@1 129 None => recur xml state
adam@1 130 | Some data =>
adam@1 131 f data;
adam@1 132 recur xml p.Initial
adam@1 133 else if xml <> "" && String.sub xml 0 = #"?" then
adam@1 134 case String.split xml #"\x3E" of
adam@1 135 None => return ()
adam@1 136 | Some (_, xml) => recur xml state
adam@1 137 else if xml <> "" && String.sub xml 0 = #"!" then
adam@2 138 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
adam@1 139 let
adam@1 140 fun skipper xml =
adam@1 141 case String.split xml #"-" of
adam@1 142 None => xml
adam@1 143 | Some (_, xml) =>
adam@2 144 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
adam@1 145 String.suffix xml 2
adam@1 146 else
adam@1 147 skipper xml
adam@1 148 in
adam@1 149 recur (skipper (String.suffix xml 3)) state
adam@1 150 end
adam@1 151 else
adam@1 152 case String.split xml #"]" of
adam@1 153 None => return ()
adam@1 154 | Some (_, xml) =>
adam@1 155 case String.split xml #"\x3E" of
adam@1 156 None => return ()
adam@1 157 | Some (_, xml) => recur xml state
adam@1 158 else
adam@1 159 case String.msplit {Needle = " >/", Haystack = xml} of
adam@1 160 None => return ()
adam@1 161 | Some (tagName, ch, xml) =>
adam@1 162 let
adam@1 163 fun readAttrs ch xml acc =
adam@1 164 case ch of
adam@1 165 #"\x3E" => (xml, acc, False)
adam@1 166 | #"/" =>
adam@1 167 (case String.split xml #"\x3E" of
adam@1 168 None => (xml, acc, True)
adam@1 169 | Some (_, xml) => (xml, acc, True))
adam@1 170 | _ =>
adam@2 171 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then
adam@1 172 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
adam@1 173 else if xml <> "" && String.sub xml 0 = #"\x3E" then
adam@1 174 (String.suffix xml 1, acc, False)
adam@1 175 else if xml <> "" && String.sub xml 0 = #"/" then
adam@1 176 (case String.split xml #"\x3E" of
adam@1 177 None => (xml, acc, True)
adam@1 178 | Some (_, xml) => (xml, acc, True))
adam@1 179 else
adam@1 180 case String.split xml #"=" of
adam@1 181 None => (xml, acc, False)
adam@1 182 | Some (aname, xml) =>
adam@1 183 if xml = "" || String.sub xml 0 <> #"\"" then
adam@1 184 (xml, (aname, "") :: acc, False)
adam@1 185 else
adam@1 186 case String.split (String.suffix xml 1) #"\"" of
adam@1 187 None => (xml, (aname, "") :: acc, False)
adam@1 188 | Some (value, xml) =>
adam@1 189 if xml = "" then
adam@1 190 (xml, (aname, value) :: acc, False)
adam@1 191 else
adam@1 192 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
adam@1 193
adam@1 194 val (xml, attrs, ended) = readAttrs ch xml []
adam@1 195
adam@1 196 fun skipSpaces xml =
adam@1 197 if xml <> "" && Char.isSpace (String.sub xml 0) then
adam@1 198 skipSpaces (String.suffix xml 1)
adam@1 199 else
adam@1 200 xml
adam@1 201
adam@1 202 val xml = skipSpaces xml
adam@1 203
adam@1 204 val (xml, cdata) =
adam@1 205 if ended then
adam@1 206 (xml, None)
adam@1 207 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
adam@1 208 let
adam@1 209 fun skipper xml acc =
adam@1 210 case String.split xml #"]" of
adam@1 211 None => (acc ^ xml, None)
adam@1 212 | Some (pre, xml) =>
adam@2 213 if String.lengthGe xml 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
adam@1 214 (String.suffix xml 2, Some (acc ^ pre))
adam@1 215 else
adam@1 216 skipper xml (acc ^ "]" ^ pre)
adam@1 217 in
adam@1 218 skipper (String.suffix xml 9) ""
adam@1 219 end
adam@1 220 else
adam@1 221 case String.split xml #"<" of
adam@1 222 None => (xml, None)
adam@1 223 | Some (cdata, xml) => ("<" ^ xml, Some cdata)
adam@1 224 in
adam@1 225 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
adam@1 226 None => recur xml p.Initial
adam@1 227 | Some state =>
adam@1 228 case (if ended then p.ExitTag state else Some state) of
adam@1 229 None => recur xml p.Initial
adam@1 230 | Some state =>
adam@1 231 case p.Finished state of
adam@1 232 None => recur xml state
adam@1 233 | Some data =>
adam@1 234 f data;
adam@1 235 recur xml p.Initial
adam@1 236 end
adam@1 237 in
adam@1 238 xml <- FeedFfi.fetch url;
adam@1 239 recur xml p.Initial
adam@1 240 end