Mercurial > feed
comparison src/ur/feed.ur @ 1:8de269c09617
Outputting a summary of Marginal Revolution RSS feed
author | Adam Chlipala <adam@chlipala.net> |
---|---|
date | Tue, 11 Jan 2011 13:17:44 -0500 |
parents | ad85b8813e8a |
children | 2ec84d349838 |
comparison
equal
deleted
inserted
replaced
0:ad85b8813e8a | 1:8de269c09617 |
---|---|
1 task initialize = fn () => FeedFfi.init | 1 task initialize = fn () => FeedFfi.init |
2 | 2 |
3 val fetch = FeedFfi.fetch | 3 datatype pattern internal output = |
4 Transducer of {Initial : internal, | |
5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal, | |
6 ExitTag : internal -> option internal, | |
7 Finished : internal -> option output} | |
8 | |
9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string} | |
10 | |
11 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs)) | |
12 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} = | |
13 Transducer {Initial = None, | |
14 EnterTag = fn tinfo state => | |
15 case state of | |
16 Some _ => None | |
17 | None => | |
18 if tinfo.Tag <> name then | |
19 None | |
20 else | |
21 case @foldUR [string] [fn r => option $(mapU string r)] | |
22 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro => | |
23 case ro of | |
24 None => None | |
25 | Some r => | |
26 case List.assoc aname tinfo.Attrs of | |
27 None => None | |
28 | Some v => Some ({nm = v} ++ r)) | |
29 (Some {}) fl attrs of | |
30 None => None | |
31 | Some vs => Some (Some {Attrs = vs, Cdata = tinfo.Cdata}), | |
32 ExitTag = Some, | |
33 Finished = fn x => x} | |
34 | |
35 datatype status a = Initial | Failed | Matched of a | |
36 | |
37 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children)) | |
38 | |
39 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}] | |
40 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children) | |
41 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) = | |
42 Transducer {Initial = None, | |
43 EnterTag = fn tinfo state => | |
44 case state of | |
45 None => | |
46 (case parent.EnterTag tinfo parent.Initial of | |
47 None => None | |
48 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial) | |
49 (@@Folder.mp [fst] [_] fl)))) | |
50 | Some (pstate, depth, cstates) => | |
51 Some (Some (pstate, | |
52 depth+1, | |
53 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i] | |
54 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) => | |
55 case cstate of | |
56 Failed => Failed | |
57 | Initial => | |
58 (case ch.EnterTag tinfo ch.Initial of | |
59 None => Failed | |
60 | Some v => Matched v) | |
61 | v => v) | |
62 fl children cstates)), | |
63 ExitTag = fn state => | |
64 case state of | |
65 None => None | |
66 | Some (pstate, depth, cstates) => | |
67 case (if depth = 1 then | |
68 parent.ExitTag pstate | |
69 else | |
70 Some pstate) of | |
71 None => None | |
72 | Some pstate => | |
73 if depth = 1 then | |
74 Some (Some (pstate, 0, cstates)) | |
75 else | |
76 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] | |
77 [fn cs => option $(map (fn (i, d) => status i) cs)] | |
78 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc => | |
79 case acc of | |
80 None => None | |
81 | Some acc => | |
82 case cstate of | |
83 Matched cstate => | |
84 (case ch.ExitTag cstate of | |
85 None => None | |
86 | Some cstate' => Some ({nm = Matched cstate'} ++ acc)) | |
87 | _ => Some ({nm = Initial} ++ acc)) | |
88 (Some {}) fl children cstates of | |
89 None => None | |
90 | Some cstates => | |
91 Some (Some (pstate, depth-1, cstates)), | |
92 Finished = fn state => | |
93 case state of | |
94 Some (pstate, 0, cstates) => | |
95 (case parent.Finished pstate of | |
96 None => None | |
97 | Some pdata => | |
98 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)] | |
99 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc => | |
100 case acc of | |
101 None => None | |
102 | Some acc => | |
103 case cstate of | |
104 Initial => None | |
105 | Failed => None | |
106 | Matched cstate => | |
107 case ch.Finished cstate of | |
108 None => None | |
109 | Some cdata => Some ({nm = cdata} ++ acc)) | |
110 (Some {}) fl children cstates of | |
111 None => None | |
112 | Some cdata => Some (pdata, cdata)) | |
113 | _ => None} | |
114 | |
115 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} = | |
116 let | |
117 fun recur xml state = | |
118 case String.split xml #"<" of | |
119 None => return () | |
120 | Some (_, xml) => | |
121 if xml <> "" && String.sub xml 0 = #"/" then | |
122 case String.split xml #"\x3E" of | |
123 None => return () | |
124 | Some (_, xml) => | |
125 case p.ExitTag state of | |
126 None => recur xml p.Initial | |
127 | Some state => | |
128 case p.Finished state of | |
129 None => recur xml state | |
130 | Some data => | |
131 f data; | |
132 recur xml p.Initial | |
133 else if xml <> "" && String.sub xml 0 = #"?" then | |
134 case String.split xml #"\x3E" of | |
135 None => return () | |
136 | Some (_, xml) => recur xml state | |
137 else if xml <> "" && String.sub xml 0 = #"!" then | |
138 if String.length xml >= 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then | |
139 let | |
140 fun skipper xml = | |
141 case String.split xml #"-" of | |
142 None => xml | |
143 | Some (_, xml) => | |
144 if String.length xml >= 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then | |
145 String.suffix xml 2 | |
146 else | |
147 skipper xml | |
148 in | |
149 recur (skipper (String.suffix xml 3)) state | |
150 end | |
151 else | |
152 case String.split xml #"]" of | |
153 None => return () | |
154 | Some (_, xml) => | |
155 case String.split xml #"\x3E" of | |
156 None => return () | |
157 | Some (_, xml) => recur xml state | |
158 else | |
159 case String.msplit {Needle = " >/", Haystack = xml} of | |
160 None => return () | |
161 | Some (tagName, ch, xml) => | |
162 let | |
163 fun readAttrs ch xml acc = | |
164 case ch of | |
165 #"\x3E" => (xml, acc, False) | |
166 | #"/" => | |
167 (case String.split xml #"\x3E" of | |
168 None => (xml, acc, True) | |
169 | Some (_, xml) => (xml, acc, True)) | |
170 | _ => | |
171 if String.length xml >= 2 && Char.isSpace (String.sub xml 0) then | |
172 readAttrs (String.sub xml 0) (String.suffix xml 1) acc | |
173 else if xml <> "" && String.sub xml 0 = #"\x3E" then | |
174 (String.suffix xml 1, acc, False) | |
175 else if xml <> "" && String.sub xml 0 = #"/" then | |
176 (case String.split xml #"\x3E" of | |
177 None => (xml, acc, True) | |
178 | Some (_, xml) => (xml, acc, True)) | |
179 else | |
180 case String.split xml #"=" of | |
181 None => (xml, acc, False) | |
182 | Some (aname, xml) => | |
183 if xml = "" || String.sub xml 0 <> #"\"" then | |
184 (xml, (aname, "") :: acc, False) | |
185 else | |
186 case String.split (String.suffix xml 1) #"\"" of | |
187 None => (xml, (aname, "") :: acc, False) | |
188 | Some (value, xml) => | |
189 if xml = "" then | |
190 (xml, (aname, value) :: acc, False) | |
191 else | |
192 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc) | |
193 | |
194 val (xml, attrs, ended) = readAttrs ch xml [] | |
195 | |
196 fun skipSpaces xml = | |
197 if xml <> "" && Char.isSpace (String.sub xml 0) then | |
198 skipSpaces (String.suffix xml 1) | |
199 else | |
200 xml | |
201 | |
202 val xml = skipSpaces xml | |
203 | |
204 val (xml, cdata) = | |
205 if ended then | |
206 (xml, None) | |
207 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then | |
208 let | |
209 fun skipper xml acc = | |
210 case String.split xml #"]" of | |
211 None => (acc ^ xml, None) | |
212 | Some (pre, xml) => | |
213 if String.length xml >= 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then | |
214 (String.suffix xml 2, Some (acc ^ pre)) | |
215 else | |
216 skipper xml (acc ^ "]" ^ pre) | |
217 in | |
218 skipper (String.suffix xml 9) "" | |
219 end | |
220 else | |
221 case String.split xml #"<" of | |
222 None => (xml, None) | |
223 | Some (cdata, xml) => ("<" ^ xml, Some cdata) | |
224 in | |
225 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of | |
226 None => recur xml p.Initial | |
227 | Some state => | |
228 case (if ended then p.ExitTag state else Some state) of | |
229 None => recur xml p.Initial | |
230 | Some state => | |
231 case p.Finished state of | |
232 None => recur xml state | |
233 | Some data => | |
234 f data; | |
235 recur xml p.Initial | |
236 end | |
237 in | |
238 xml <- FeedFfi.fetch url; | |
239 recur xml p.Initial | |
240 end |