comparison src/ur/feed.ur @ 1:8de269c09617

Outputting a summary of Marginal Revolution RSS feed
author Adam Chlipala <adam@chlipala.net>
date Tue, 11 Jan 2011 13:17:44 -0500
parents ad85b8813e8a
children 2ec84d349838
comparison
equal deleted inserted replaced
0:ad85b8813e8a 1:8de269c09617
1 task initialize = fn () => FeedFfi.init 1 task initialize = fn () => FeedFfi.init
2 2
3 val fetch = FeedFfi.fetch 3 datatype pattern internal output =
4 Transducer of {Initial : internal,
5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
6 ExitTag : internal -> option internal,
7 Finished : internal -> option output}
8
9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
10
11 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
12 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
13 Transducer {Initial = None,
14 EnterTag = fn tinfo state =>
15 case state of
16 Some _ => None
17 | None =>
18 if tinfo.Tag <> name then
19 None
20 else
21 case @foldUR [string] [fn r => option $(mapU string r)]
22 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
23 case ro of
24 None => None
25 | Some r =>
26 case List.assoc aname tinfo.Attrs of
27 None => None
28 | Some v => Some ({nm = v} ++ r))
29 (Some {}) fl attrs of
30 None => None
31 | Some vs => Some (Some {Attrs = vs, Cdata = tinfo.Cdata}),
32 ExitTag = Some,
33 Finished = fn x => x}
34
35 datatype status a = Initial | Failed | Matched of a
36
37 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
38
39 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
40 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
41 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
42 Transducer {Initial = None,
43 EnterTag = fn tinfo state =>
44 case state of
45 None =>
46 (case parent.EnterTag tinfo parent.Initial of
47 None => None
48 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
49 (@@Folder.mp [fst] [_] fl))))
50 | Some (pstate, depth, cstates) =>
51 Some (Some (pstate,
52 depth+1,
53 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
54 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) =>
55 case cstate of
56 Failed => Failed
57 | Initial =>
58 (case ch.EnterTag tinfo ch.Initial of
59 None => Failed
60 | Some v => Matched v)
61 | v => v)
62 fl children cstates)),
63 ExitTag = fn state =>
64 case state of
65 None => None
66 | Some (pstate, depth, cstates) =>
67 case (if depth = 1 then
68 parent.ExitTag pstate
69 else
70 Some pstate) of
71 None => None
72 | Some pstate =>
73 if depth = 1 then
74 Some (Some (pstate, 0, cstates))
75 else
76 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i]
77 [fn cs => option $(map (fn (i, d) => status i) cs)]
78 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
79 case acc of
80 None => None
81 | Some acc =>
82 case cstate of
83 Matched cstate =>
84 (case ch.ExitTag cstate of
85 None => None
86 | Some cstate' => Some ({nm = Matched cstate'} ++ acc))
87 | _ => Some ({nm = Initial} ++ acc))
88 (Some {}) fl children cstates of
89 None => None
90 | Some cstates =>
91 Some (Some (pstate, depth-1, cstates)),
92 Finished = fn state =>
93 case state of
94 Some (pstate, 0, cstates) =>
95 (case parent.Finished pstate of
96 None => None
97 | Some pdata =>
98 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
99 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
100 case acc of
101 None => None
102 | Some acc =>
103 case cstate of
104 Initial => None
105 | Failed => None
106 | Matched cstate =>
107 case ch.Finished cstate of
108 None => None
109 | Some cdata => Some ({nm = cdata} ++ acc))
110 (Some {}) fl children cstates of
111 None => None
112 | Some cdata => Some (pdata, cdata))
113 | _ => None}
114
115 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
116 let
117 fun recur xml state =
118 case String.split xml #"<" of
119 None => return ()
120 | Some (_, xml) =>
121 if xml <> "" && String.sub xml 0 = #"/" then
122 case String.split xml #"\x3E" of
123 None => return ()
124 | Some (_, xml) =>
125 case p.ExitTag state of
126 None => recur xml p.Initial
127 | Some state =>
128 case p.Finished state of
129 None => recur xml state
130 | Some data =>
131 f data;
132 recur xml p.Initial
133 else if xml <> "" && String.sub xml 0 = #"?" then
134 case String.split xml #"\x3E" of
135 None => return ()
136 | Some (_, xml) => recur xml state
137 else if xml <> "" && String.sub xml 0 = #"!" then
138 if String.length xml >= 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
139 let
140 fun skipper xml =
141 case String.split xml #"-" of
142 None => xml
143 | Some (_, xml) =>
144 if String.length xml >= 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
145 String.suffix xml 2
146 else
147 skipper xml
148 in
149 recur (skipper (String.suffix xml 3)) state
150 end
151 else
152 case String.split xml #"]" of
153 None => return ()
154 | Some (_, xml) =>
155 case String.split xml #"\x3E" of
156 None => return ()
157 | Some (_, xml) => recur xml state
158 else
159 case String.msplit {Needle = " >/", Haystack = xml} of
160 None => return ()
161 | Some (tagName, ch, xml) =>
162 let
163 fun readAttrs ch xml acc =
164 case ch of
165 #"\x3E" => (xml, acc, False)
166 | #"/" =>
167 (case String.split xml #"\x3E" of
168 None => (xml, acc, True)
169 | Some (_, xml) => (xml, acc, True))
170 | _ =>
171 if String.length xml >= 2 && Char.isSpace (String.sub xml 0) then
172 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
173 else if xml <> "" && String.sub xml 0 = #"\x3E" then
174 (String.suffix xml 1, acc, False)
175 else if xml <> "" && String.sub xml 0 = #"/" then
176 (case String.split xml #"\x3E" of
177 None => (xml, acc, True)
178 | Some (_, xml) => (xml, acc, True))
179 else
180 case String.split xml #"=" of
181 None => (xml, acc, False)
182 | Some (aname, xml) =>
183 if xml = "" || String.sub xml 0 <> #"\"" then
184 (xml, (aname, "") :: acc, False)
185 else
186 case String.split (String.suffix xml 1) #"\"" of
187 None => (xml, (aname, "") :: acc, False)
188 | Some (value, xml) =>
189 if xml = "" then
190 (xml, (aname, value) :: acc, False)
191 else
192 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
193
194 val (xml, attrs, ended) = readAttrs ch xml []
195
196 fun skipSpaces xml =
197 if xml <> "" && Char.isSpace (String.sub xml 0) then
198 skipSpaces (String.suffix xml 1)
199 else
200 xml
201
202 val xml = skipSpaces xml
203
204 val (xml, cdata) =
205 if ended then
206 (xml, None)
207 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
208 let
209 fun skipper xml acc =
210 case String.split xml #"]" of
211 None => (acc ^ xml, None)
212 | Some (pre, xml) =>
213 if String.length xml >= 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
214 (String.suffix xml 2, Some (acc ^ pre))
215 else
216 skipper xml (acc ^ "]" ^ pre)
217 in
218 skipper (String.suffix xml 9) ""
219 end
220 else
221 case String.split xml #"<" of
222 None => (xml, None)
223 | Some (cdata, xml) => ("<" ^ xml, Some cdata)
224 in
225 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
226 None => recur xml p.Initial
227 | Some state =>
228 case (if ended then p.ExitTag state else Some state) of
229 None => recur xml p.Initial
230 | Some state =>
231 case p.Finished state of
232 None => recur xml state
233 | Some data =>
234 f data;
235 recur xml p.Initial
236 end
237 in
238 xml <- FeedFfi.fetch url;
239 recur xml p.Initial
240 end