comparison src/ur/feed.ur @ 4:af95d9d73eb5

Feed.tree
author Adam Chlipala <adam@chlipala.net>
date Tue, 11 Jan 2011 18:04:15 -0500
parents ea0ca570c121
children 2717458d8951
comparison
equal deleted inserted replaced
3:ea0ca570c121 4:af95d9d73eb5
1 task initialize = fn () => FeedFfi.init 1 task initialize = fn () => FeedFfi.init
2 2
3 datatype pattern internal output = 3 con pattern internal output = {Initial : internal,
4 Transducer of {Initial : internal, 4 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal, 5 ExitTag : internal -> option internal,
6 ExitTag : internal -> option internal, 6 Finished : internal -> option (output * bool)}
7 Finished : internal -> option output} 7
8 val null : pattern unit (variant []) =
9 {Initial = (),
10 EnterTag = fn _ () => Some (),
11 ExitTag = fn () => Some (),
12 Finished = fn () => None}
8 13
9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string} 14 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
10 15
11 fun tagG [attrs ::: {Unit}] [t ::: Type] (fl : folder attrs) (accept : {Attrs : $(mapU string attrs), Cdata : option string} -> option t) 16 fun tagG [attrs ::: {Unit}] [t ::: Type] (fl : folder attrs) (accept : {Attrs : $(mapU string attrs), Cdata : option string} -> option t)
12 (name : string) (attrs : $(mapU string attrs)) 17 (name : string) (attrs : $(mapU string attrs))
13 : pattern (tagInternal attrs) t = 18 : pattern (tagInternal attrs) t =
14 Transducer {Initial = None, 19 {Initial = None,
15 EnterTag = fn tinfo state => 20 EnterTag = fn tinfo _ =>
16 case state of 21 if tinfo.Tag <> name then
17 Some _ => None 22 None
18 | None => 23 else
19 if tinfo.Tag <> name then 24 case @foldUR [string] [fn r => option $(mapU string r)]
20 None 25 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
21 else 26 case ro of
22 case @foldUR [string] [fn r => option $(mapU string r)] 27 None => None
23 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro => 28 | Some r =>
24 case ro of 29 case List.assoc aname tinfo.Attrs of
25 None => None 30 None => None
26 | Some r => 31 | Some v => Some ({nm = v} ++ r))
27 case List.assoc aname tinfo.Attrs of 32 (Some {}) fl attrs of
28 None => None 33 None => None
29 | Some v => Some ({nm = v} ++ r)) 34 | Some vs =>
30 (Some {}) fl attrs of 35 let
31 None => None 36 val v = {Attrs = vs, Cdata = tinfo.Cdata}
32 | Some vs => 37 in
33 let 38 case accept v of
34 val v = {Attrs = vs, Cdata = tinfo.Cdata} 39 None => None
35 in 40 | Some _ => Some (Some v)
36 case accept v of 41 end,
37 None => None 42 ExitTag = fn _ => None,
38 | Some _ => Some (Some v) 43 Finished = fn state => case state of
39 end, 44 None => None
40 ExitTag = Some, 45 | Some state =>
41 Finished = Option.bind accept} 46 case accept state of
47 None => None
48 | Some v => Some (v, False)}
42 49
43 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs)) 50 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
44 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} = 51 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
45 @tagG fl Some name attrs 52 @tagG fl Some name attrs
46 53
49 @tagG fl (fn r => Some r.Attrs) name attrs 56 @tagG fl (fn r => Some r.Attrs) name attrs
50 57
51 fun tagC (name : string) : pattern (tagInternal []) string = 58 fun tagC (name : string) : pattern (tagInternal []) string =
52 tagG (fn r => r.Cdata) name {} 59 tagG (fn r => r.Cdata) name {}
53 60
54 datatype status a = Initial | Failed | Matched of a 61 datatype status a = Initial | Pending of a | Matched of a
55 62
56 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children)) 63 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
57 64
58 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}] 65 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
59 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children) 66 (parent : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
60 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) = 67 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
61 Transducer {Initial = None, 68 {Initial = None,
62 EnterTag = fn tinfo state => 69 EnterTag = fn tinfo state =>
63 case state of 70 case state of
64 None => 71 None =>
65 (case parent.EnterTag tinfo parent.Initial of 72 (case parent.EnterTag tinfo parent.Initial of
66 None => None 73 None => None
67 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial) 74 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
68 (@@Folder.mp [fst] [_] fl)))) 75 (@@Folder.mp [fst] [_] fl))))
69 | Some (pstate, depth, cstates) => 76 | Some (pstate, depth, cstates) =>
70 Some (Some (pstate, 77 Some (Some (pstate,
71 depth+1, 78 depth+1,
72 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i] 79 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
73 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) => 80 (fn [p] (ch : pattern p.1 p.2) (cstate : status p.1) =>
74 case cstate of 81 case cstate of
75 Failed => Failed 82 Initial =>
76 | Initial => 83 (case ch.EnterTag tinfo ch.Initial of
77 (case ch.EnterTag tinfo ch.Initial of 84 None => Initial
78 None => Failed 85 | Some v =>
79 | Some v => Matched v) 86 case ch.Finished v of
80 | v => v) 87 None => Pending v
81 fl children cstates)), 88 | _ => Matched v)
82 ExitTag = fn state => 89 | Pending cstate =>
83 case state of 90 (case ch.EnterTag tinfo cstate of
84 None => None 91 None => Initial
85 | Some (pstate, depth, cstates) => 92 | Some v =>
86 case (if depth = 1 then 93 case ch.Finished v of
87 parent.ExitTag pstate 94 None => Pending v
88 else 95 | _ => Matched v)
89 Some pstate) of 96 | v => v)
90 None => None 97 fl children cstates)),
91 | Some pstate => 98 ExitTag = fn state =>
92 if depth = 1 then 99 case state of
93 Some (Some (pstate, 0, cstates)) 100 None => None
94 else 101 | Some (pstate, 1, cstates) =>
95 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] 102 (case parent.ExitTag pstate of
96 [fn cs => option $(map (fn (i, d) => status i) cs)] 103 None => None
97 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc => 104 | Some pstate => Some (Some (pstate, 0, cstates)))
98 case acc of 105 | Some (pstate, depth, cstates) =>
99 None => None 106 Some (Some (pstate, depth-1,
100 | Some acc => 107 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
101 case cstate of 108 (fn [p] (ch : pattern p.1 p.2) (cstate : status p.1) =>
102 Matched cstate => 109 case cstate of
103 (case ch.ExitTag cstate of 110 Pending cstate =>
104 None => None 111 (case ch.ExitTag cstate of
105 | Some cstate' => Some ({nm = Matched cstate'} ++ acc)) 112 None => Initial
106 | _ => Some ({nm = Initial} ++ acc)) 113 | Some cstate' =>
107 (Some {}) fl children cstates of 114 case ch.Finished cstate' of
108 None => None 115 None => Pending cstate'
109 | Some cstates => 116 | _ => Matched cstate')
110 Some (Some (pstate, depth-1, cstates)), 117 | _ => cstate)
111 Finished = fn state => 118 fl children cstates)),
112 case state of 119 Finished = fn state =>
113 Some (pstate, 0, cstates) => 120 case state of
114 (case parent.Finished pstate of 121 Some (pstate, _, cstates) =>
115 None => None 122 (case parent.Finished pstate of
116 | Some pdata => 123 None => None
117 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)] 124 | Some (pdata, pcont) =>
118 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc => 125 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
119 case acc of 126 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] (ch : pattern p.1 p.2) (cstate : status p.1) acc =>
127 case acc of
128 None => None
129 | Some acc =>
130 case cstate of
131 Matched cstate =>
132 (case ch.Finished cstate of
120 None => None 133 None => None
121 | Some acc => 134 | Some (cdata, _) => Some ({nm = cdata} ++ acc))
122 case cstate of 135 | _ => None)
123 Initial => None 136 (Some {}) fl children cstates of
124 | Failed => None 137 None => None
125 | Matched cstate => 138 | Some cdata => Some ((pdata, cdata), pcont))
126 case ch.Finished cstate of 139 | _ => None}
127 None => None 140
128 | Some cdata => Some ({nm = cdata} ++ acc)) 141 con treeInternal (parent :: Type) (child :: Type) = option (parent * int * option child)
129 (Some {}) fl children cstates of 142
130 None => None 143 fun tree [parentI ::: Type] [parent ::: Type] [childI ::: Type] [child ::: Type]
131 | Some cdata => Some (pdata, cdata)) 144 (parent : pattern parentI parent) (child : pattern childI child)
132 | _ => None} 145 : pattern (treeInternal parentI childI) (parent * child) =
133 146 {Initial = None,
134 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} = 147 EnterTag = fn tinfo state =>
148 case state of
149 None =>
150 (case parent.EnterTag tinfo parent.Initial of
151 None => None
152 | Some pstate => Some (Some (pstate, 1, None)))
153 | Some (pstate, depth, cstate) =>
154 Some (Some (pstate,
155 depth+1,
156 child.EnterTag tinfo (Option.get child.Initial cstate))),
157 ExitTag = fn state =>
158 case state of
159 None => None
160 | Some (pstate, 1, cstate) =>
161 (case parent.ExitTag pstate of
162 None => None
163 | Some pstate => Some (Some (pstate, 0, cstate)))
164 | Some (pstate, depth, cstate) =>
165 Some (Some (pstate, depth-1, Option.bind child.ExitTag cstate)),
166 Finished = fn state =>
167 case state of
168 None => None
169 | Some (pstate, _, cstate) =>
170 case parent.Finished pstate of
171 None => None
172 | Some (pdata, _) =>
173 case cstate of
174 None => None
175 | Some cstate =>
176 case child.Finished cstate of
177 None => None
178 | Some (cdata, _) => Some ((pdata, cdata), True)}
179
180 fun app [internal ::: Type] [data ::: Type] (p : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
135 let 181 let
136 fun recur xml state = 182 fun recur xml state =
137 case String.split xml #"<" of 183 case String.seek xml #"<" of
138 None => return () 184 None => return ()
139 | Some (_, xml) => 185 | Some xml =>
140 if xml <> "" && String.sub xml 0 = #"/" then 186 if xml <> "" && String.sub xml 0 = #"/" then
141 case String.split xml #"\x3E" of 187 case String.seek xml #"\x3E" of
142 None => return () 188 None => return ()
143 | Some (_, xml) => 189 | Some xml =>
144 case p.ExitTag state of 190 case p.ExitTag state of
145 None => recur xml p.Initial 191 None => recur xml p.Initial
146 | Some state => 192 | Some state =>
147 case p.Finished state of 193 case p.Finished state of
148 None => recur xml state 194 None => recur xml state
149 | Some data => 195 | Some (data, cont) =>
150 f data; 196 f data;
151 recur xml p.Initial 197 recur xml (if cont then state else p.Initial)
152 else if xml <> "" && String.sub xml 0 = #"?" then 198 else if xml <> "" && String.sub xml 0 = #"?" then
153 case String.split xml #"\x3E" of 199 case String.seek xml #"\x3E" of
154 None => return () 200 None => return ()
155 | Some (_, xml) => recur xml state 201 | Some xml => recur xml state
156 else if xml <> "" && String.sub xml 0 = #"!" then 202 else if xml <> "" && String.sub xml 0 = #"!" then
157 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then 203 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
158 let 204 let
159 fun skipper xml = 205 fun skipper xml =
160 case String.split xml #"-" of 206 case String.seek xml #"-" of
161 None => xml 207 None => xml
162 | Some (_, xml) => 208 | Some xml =>
163 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then 209 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
164 String.suffix xml 2 210 String.suffix xml 2
165 else 211 else
166 skipper xml 212 skipper xml
167 in 213 in
168 recur (skipper (String.suffix xml 3)) state 214 recur (skipper (String.suffix xml 3)) state
169 end 215 end
170 else 216 else
171 case String.split xml #"]" of 217 case String.seek xml #"]" of
172 None => return () 218 None => return ()
173 | Some (_, xml) => 219 | Some xml =>
174 case String.split xml #"\x3E" of 220 case String.seek xml #"\x3E" of
175 None => return () 221 None => return ()
176 | Some (_, xml) => recur xml state 222 | Some xml => recur xml state
177 else 223 else
178 case String.msplit {Needle = " >/", Haystack = xml} of 224 case String.msplit {Needle = " >/", Haystack = xml} of
179 None => return () 225 None => return ()
180 | Some (tagName, ch, xml) => 226 | Some (tagName, ch, xml) =>
181 let 227 let
182 fun readAttrs ch xml acc = 228 fun readAttrs ch xml acc =
183 case ch of 229 case ch of
184 #"\x3E" => (xml, acc, False) 230 #"\x3E" => (xml, acc, False)
185 | #"/" => 231 | #"/" =>
186 (case String.split xml #"\x3E" of 232 (case String.seek xml #"\x3E" of
187 None => (xml, acc, True) 233 None => (xml, acc, True)
188 | Some (_, xml) => (xml, acc, True)) 234 | Some xml => (xml, acc, True))
189 | _ => 235 | _ =>
190 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then 236 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then
191 readAttrs (String.sub xml 0) (String.suffix xml 1) acc 237 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
192 else if xml <> "" && String.sub xml 0 = #"\x3E" then 238 else if xml <> "" && String.sub xml 0 = #"\x3E" then
193 (String.suffix xml 1, acc, False) 239 (String.suffix xml 1, acc, False)
194 else if xml <> "" && String.sub xml 0 = #"/" then 240 else if xml <> "" && String.sub xml 0 = #"/" then
195 (case String.split xml #"\x3E" of 241 (case String.seek xml #"\x3E" of
196 None => (xml, acc, True) 242 None => (xml, acc, True)
197 | Some (_, xml) => (xml, acc, True)) 243 | Some xml => (xml, acc, True))
198 else 244 else
199 case String.split xml #"=" of 245 case String.split xml #"=" of
200 None => (xml, acc, False) 246 None => (xml, acc, False)
201 | Some (aname, xml) => 247 | Some (aname, xml) =>
202 if xml = "" || String.sub xml 0 <> #"\"" then 248 if xml = "" || String.sub xml 0 <> #"\"" then
235 skipper xml (acc ^ "]" ^ pre) 281 skipper xml (acc ^ "]" ^ pre)
236 in 282 in
237 skipper (String.suffix xml 9) "" 283 skipper (String.suffix xml 9) ""
238 end 284 end
239 else 285 else
240 case String.split xml #"<" of 286 case String.split' xml #"<" of
241 None => (xml, None) 287 None => (xml, None)
242 | Some (cdata, xml) => ("<" ^ xml, Some cdata) 288 | Some (cdata, xml) => (xml, Some cdata)
243 in 289 in
244 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of 290 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
245 None => recur xml p.Initial 291 None => recur xml p.Initial
246 | Some state => 292 | Some state =>
247 case (if ended then p.ExitTag state else Some state) of 293 case p.Finished state of
248 None => recur xml p.Initial 294 None =>
249 | Some state => 295 (case (if ended then p.ExitTag state else Some state) of
250 case p.Finished state of 296 None => recur xml p.Initial
251 None => recur xml state 297 | Some state =>
252 | Some data => 298 case p.Finished state of
253 f data; 299 None => recur xml state
254 recur xml p.Initial 300 | Some (data, cont) =>
301 f data;
302 recur xml (if cont then state else p.Initial))
303 | Some (data, cont) =>
304 f data;
305 recur xml (if cont then state else p.Initial)
255 end 306 end
256 in 307 in
257 xml <- FeedFfi.fetch url; 308 xml <- FeedFfi.fetch url;
258 recur xml p.Initial 309 recur xml p.Initial
259 end 310 end