adam@0
|
1 task initialize = fn () => FeedFfi.init
|
adam@0
|
2
|
adam@1
|
3 datatype pattern internal output =
|
adam@1
|
4 Transducer of {Initial : internal,
|
adam@1
|
5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
|
adam@1
|
6 ExitTag : internal -> option internal,
|
adam@1
|
7 Finished : internal -> option output}
|
adam@1
|
8
|
adam@1
|
9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
|
adam@1
|
10
|
adam@3
|
11 fun tagG [attrs ::: {Unit}] [t ::: Type] (fl : folder attrs) (accept : {Attrs : $(mapU string attrs), Cdata : option string} -> option t)
|
adam@3
|
12 (name : string) (attrs : $(mapU string attrs))
|
adam@3
|
13 : pattern (tagInternal attrs) t =
|
adam@1
|
14 Transducer {Initial = None,
|
adam@1
|
15 EnterTag = fn tinfo state =>
|
adam@1
|
16 case state of
|
adam@1
|
17 Some _ => None
|
adam@1
|
18 | None =>
|
adam@1
|
19 if tinfo.Tag <> name then
|
adam@1
|
20 None
|
adam@1
|
21 else
|
adam@1
|
22 case @foldUR [string] [fn r => option $(mapU string r)]
|
adam@1
|
23 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
|
adam@1
|
24 case ro of
|
adam@1
|
25 None => None
|
adam@1
|
26 | Some r =>
|
adam@1
|
27 case List.assoc aname tinfo.Attrs of
|
adam@1
|
28 None => None
|
adam@1
|
29 | Some v => Some ({nm = v} ++ r))
|
adam@1
|
30 (Some {}) fl attrs of
|
adam@1
|
31 None => None
|
adam@3
|
32 | Some vs =>
|
adam@3
|
33 let
|
adam@3
|
34 val v = {Attrs = vs, Cdata = tinfo.Cdata}
|
adam@3
|
35 in
|
adam@3
|
36 case accept v of
|
adam@3
|
37 None => None
|
adam@3
|
38 | Some _ => Some (Some v)
|
adam@3
|
39 end,
|
adam@1
|
40 ExitTag = Some,
|
adam@3
|
41 Finished = Option.bind accept}
|
adam@3
|
42
|
adam@3
|
43 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
|
adam@3
|
44 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
|
adam@3
|
45 @tagG fl Some name attrs
|
adam@3
|
46
|
adam@3
|
47 fun tagA [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
|
adam@3
|
48 : pattern (tagInternal attrs) $(mapU string attrs) =
|
adam@3
|
49 @tagG fl (fn r => Some r.Attrs) name attrs
|
adam@3
|
50
|
adam@3
|
51 fun tagC (name : string) : pattern (tagInternal []) string =
|
adam@3
|
52 tagG (fn r => r.Cdata) name {}
|
adam@1
|
53
|
adam@1
|
54 datatype status a = Initial | Failed | Matched of a
|
adam@1
|
55
|
adam@1
|
56 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
|
adam@1
|
57
|
adam@1
|
58 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
|
adam@1
|
59 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
|
adam@1
|
60 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
|
adam@1
|
61 Transducer {Initial = None,
|
adam@1
|
62 EnterTag = fn tinfo state =>
|
adam@1
|
63 case state of
|
adam@1
|
64 None =>
|
adam@1
|
65 (case parent.EnterTag tinfo parent.Initial of
|
adam@1
|
66 None => None
|
adam@1
|
67 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
|
adam@1
|
68 (@@Folder.mp [fst] [_] fl))))
|
adam@1
|
69 | Some (pstate, depth, cstates) =>
|
adam@1
|
70 Some (Some (pstate,
|
adam@1
|
71 depth+1,
|
adam@1
|
72 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
|
adam@1
|
73 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) =>
|
adam@1
|
74 case cstate of
|
adam@1
|
75 Failed => Failed
|
adam@1
|
76 | Initial =>
|
adam@1
|
77 (case ch.EnterTag tinfo ch.Initial of
|
adam@1
|
78 None => Failed
|
adam@1
|
79 | Some v => Matched v)
|
adam@1
|
80 | v => v)
|
adam@1
|
81 fl children cstates)),
|
adam@1
|
82 ExitTag = fn state =>
|
adam@1
|
83 case state of
|
adam@1
|
84 None => None
|
adam@1
|
85 | Some (pstate, depth, cstates) =>
|
adam@1
|
86 case (if depth = 1 then
|
adam@1
|
87 parent.ExitTag pstate
|
adam@1
|
88 else
|
adam@1
|
89 Some pstate) of
|
adam@1
|
90 None => None
|
adam@1
|
91 | Some pstate =>
|
adam@1
|
92 if depth = 1 then
|
adam@1
|
93 Some (Some (pstate, 0, cstates))
|
adam@1
|
94 else
|
adam@1
|
95 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i]
|
adam@1
|
96 [fn cs => option $(map (fn (i, d) => status i) cs)]
|
adam@1
|
97 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
|
adam@1
|
98 case acc of
|
adam@1
|
99 None => None
|
adam@1
|
100 | Some acc =>
|
adam@1
|
101 case cstate of
|
adam@1
|
102 Matched cstate =>
|
adam@1
|
103 (case ch.ExitTag cstate of
|
adam@1
|
104 None => None
|
adam@1
|
105 | Some cstate' => Some ({nm = Matched cstate'} ++ acc))
|
adam@1
|
106 | _ => Some ({nm = Initial} ++ acc))
|
adam@1
|
107 (Some {}) fl children cstates of
|
adam@1
|
108 None => None
|
adam@1
|
109 | Some cstates =>
|
adam@1
|
110 Some (Some (pstate, depth-1, cstates)),
|
adam@1
|
111 Finished = fn state =>
|
adam@1
|
112 case state of
|
adam@1
|
113 Some (pstate, 0, cstates) =>
|
adam@1
|
114 (case parent.Finished pstate of
|
adam@1
|
115 None => None
|
adam@1
|
116 | Some pdata =>
|
adam@1
|
117 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
|
adam@1
|
118 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
|
adam@1
|
119 case acc of
|
adam@1
|
120 None => None
|
adam@1
|
121 | Some acc =>
|
adam@1
|
122 case cstate of
|
adam@1
|
123 Initial => None
|
adam@1
|
124 | Failed => None
|
adam@1
|
125 | Matched cstate =>
|
adam@1
|
126 case ch.Finished cstate of
|
adam@1
|
127 None => None
|
adam@1
|
128 | Some cdata => Some ({nm = cdata} ++ acc))
|
adam@1
|
129 (Some {}) fl children cstates of
|
adam@1
|
130 None => None
|
adam@1
|
131 | Some cdata => Some (pdata, cdata))
|
adam@1
|
132 | _ => None}
|
adam@1
|
133
|
adam@1
|
134 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
|
adam@1
|
135 let
|
adam@1
|
136 fun recur xml state =
|
adam@1
|
137 case String.split xml #"<" of
|
adam@1
|
138 None => return ()
|
adam@1
|
139 | Some (_, xml) =>
|
adam@1
|
140 if xml <> "" && String.sub xml 0 = #"/" then
|
adam@1
|
141 case String.split xml #"\x3E" of
|
adam@1
|
142 None => return ()
|
adam@1
|
143 | Some (_, xml) =>
|
adam@1
|
144 case p.ExitTag state of
|
adam@1
|
145 None => recur xml p.Initial
|
adam@1
|
146 | Some state =>
|
adam@1
|
147 case p.Finished state of
|
adam@1
|
148 None => recur xml state
|
adam@1
|
149 | Some data =>
|
adam@1
|
150 f data;
|
adam@1
|
151 recur xml p.Initial
|
adam@1
|
152 else if xml <> "" && String.sub xml 0 = #"?" then
|
adam@1
|
153 case String.split xml #"\x3E" of
|
adam@1
|
154 None => return ()
|
adam@1
|
155 | Some (_, xml) => recur xml state
|
adam@1
|
156 else if xml <> "" && String.sub xml 0 = #"!" then
|
adam@2
|
157 if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
|
adam@1
|
158 let
|
adam@1
|
159 fun skipper xml =
|
adam@1
|
160 case String.split xml #"-" of
|
adam@1
|
161 None => xml
|
adam@1
|
162 | Some (_, xml) =>
|
adam@2
|
163 if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
|
adam@1
|
164 String.suffix xml 2
|
adam@1
|
165 else
|
adam@1
|
166 skipper xml
|
adam@1
|
167 in
|
adam@1
|
168 recur (skipper (String.suffix xml 3)) state
|
adam@1
|
169 end
|
adam@1
|
170 else
|
adam@1
|
171 case String.split xml #"]" of
|
adam@1
|
172 None => return ()
|
adam@1
|
173 | Some (_, xml) =>
|
adam@1
|
174 case String.split xml #"\x3E" of
|
adam@1
|
175 None => return ()
|
adam@1
|
176 | Some (_, xml) => recur xml state
|
adam@1
|
177 else
|
adam@1
|
178 case String.msplit {Needle = " >/", Haystack = xml} of
|
adam@1
|
179 None => return ()
|
adam@1
|
180 | Some (tagName, ch, xml) =>
|
adam@1
|
181 let
|
adam@1
|
182 fun readAttrs ch xml acc =
|
adam@1
|
183 case ch of
|
adam@1
|
184 #"\x3E" => (xml, acc, False)
|
adam@1
|
185 | #"/" =>
|
adam@1
|
186 (case String.split xml #"\x3E" of
|
adam@1
|
187 None => (xml, acc, True)
|
adam@1
|
188 | Some (_, xml) => (xml, acc, True))
|
adam@1
|
189 | _ =>
|
adam@2
|
190 if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then
|
adam@1
|
191 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
|
adam@1
|
192 else if xml <> "" && String.sub xml 0 = #"\x3E" then
|
adam@1
|
193 (String.suffix xml 1, acc, False)
|
adam@1
|
194 else if xml <> "" && String.sub xml 0 = #"/" then
|
adam@1
|
195 (case String.split xml #"\x3E" of
|
adam@1
|
196 None => (xml, acc, True)
|
adam@1
|
197 | Some (_, xml) => (xml, acc, True))
|
adam@1
|
198 else
|
adam@1
|
199 case String.split xml #"=" of
|
adam@1
|
200 None => (xml, acc, False)
|
adam@1
|
201 | Some (aname, xml) =>
|
adam@1
|
202 if xml = "" || String.sub xml 0 <> #"\"" then
|
adam@1
|
203 (xml, (aname, "") :: acc, False)
|
adam@1
|
204 else
|
adam@1
|
205 case String.split (String.suffix xml 1) #"\"" of
|
adam@1
|
206 None => (xml, (aname, "") :: acc, False)
|
adam@1
|
207 | Some (value, xml) =>
|
adam@1
|
208 if xml = "" then
|
adam@1
|
209 (xml, (aname, value) :: acc, False)
|
adam@1
|
210 else
|
adam@1
|
211 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
|
adam@1
|
212
|
adam@1
|
213 val (xml, attrs, ended) = readAttrs ch xml []
|
adam@1
|
214
|
adam@1
|
215 fun skipSpaces xml =
|
adam@1
|
216 if xml <> "" && Char.isSpace (String.sub xml 0) then
|
adam@1
|
217 skipSpaces (String.suffix xml 1)
|
adam@1
|
218 else
|
adam@1
|
219 xml
|
adam@1
|
220
|
adam@1
|
221 val xml = skipSpaces xml
|
adam@1
|
222
|
adam@1
|
223 val (xml, cdata) =
|
adam@1
|
224 if ended then
|
adam@1
|
225 (xml, None)
|
adam@1
|
226 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
|
adam@1
|
227 let
|
adam@1
|
228 fun skipper xml acc =
|
adam@1
|
229 case String.split xml #"]" of
|
adam@1
|
230 None => (acc ^ xml, None)
|
adam@1
|
231 | Some (pre, xml) =>
|
adam@2
|
232 if String.lengthGe xml 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
|
adam@1
|
233 (String.suffix xml 2, Some (acc ^ pre))
|
adam@1
|
234 else
|
adam@1
|
235 skipper xml (acc ^ "]" ^ pre)
|
adam@1
|
236 in
|
adam@1
|
237 skipper (String.suffix xml 9) ""
|
adam@1
|
238 end
|
adam@1
|
239 else
|
adam@1
|
240 case String.split xml #"<" of
|
adam@1
|
241 None => (xml, None)
|
adam@1
|
242 | Some (cdata, xml) => ("<" ^ xml, Some cdata)
|
adam@1
|
243 in
|
adam@1
|
244 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
|
adam@1
|
245 None => recur xml p.Initial
|
adam@1
|
246 | Some state =>
|
adam@1
|
247 case (if ended then p.ExitTag state else Some state) of
|
adam@1
|
248 None => recur xml p.Initial
|
adam@1
|
249 | Some state =>
|
adam@1
|
250 case p.Finished state of
|
adam@1
|
251 None => recur xml state
|
adam@1
|
252 | Some data =>
|
adam@1
|
253 f data;
|
adam@1
|
254 recur xml p.Initial
|
adam@1
|
255 end
|
adam@1
|
256 in
|
adam@1
|
257 xml <- FeedFfi.fetch url;
|
adam@1
|
258 recur xml p.Initial
|
adam@1
|
259 end
|