adam@0
|
1 task initialize = fn () => FeedFfi.init
|
adam@0
|
2
|
adam@1
|
3 datatype pattern internal output =
|
adam@1
|
4 Transducer of {Initial : internal,
|
adam@1
|
5 EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
|
adam@1
|
6 ExitTag : internal -> option internal,
|
adam@1
|
7 Finished : internal -> option output}
|
adam@1
|
8
|
adam@1
|
9 con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
|
adam@1
|
10
|
adam@1
|
11 fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
|
adam@1
|
12 : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
|
adam@1
|
13 Transducer {Initial = None,
|
adam@1
|
14 EnterTag = fn tinfo state =>
|
adam@1
|
15 case state of
|
adam@1
|
16 Some _ => None
|
adam@1
|
17 | None =>
|
adam@1
|
18 if tinfo.Tag <> name then
|
adam@1
|
19 None
|
adam@1
|
20 else
|
adam@1
|
21 case @foldUR [string] [fn r => option $(mapU string r)]
|
adam@1
|
22 (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
|
adam@1
|
23 case ro of
|
adam@1
|
24 None => None
|
adam@1
|
25 | Some r =>
|
adam@1
|
26 case List.assoc aname tinfo.Attrs of
|
adam@1
|
27 None => None
|
adam@1
|
28 | Some v => Some ({nm = v} ++ r))
|
adam@1
|
29 (Some {}) fl attrs of
|
adam@1
|
30 None => None
|
adam@1
|
31 | Some vs => Some (Some {Attrs = vs, Cdata = tinfo.Cdata}),
|
adam@1
|
32 ExitTag = Some,
|
adam@1
|
33 Finished = fn x => x}
|
adam@1
|
34
|
adam@1
|
35 datatype status a = Initial | Failed | Matched of a
|
adam@1
|
36
|
adam@1
|
37 con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
|
adam@1
|
38
|
adam@1
|
39 fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
|
adam@1
|
40 ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
|
adam@1
|
41 : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
|
adam@1
|
42 Transducer {Initial = None,
|
adam@1
|
43 EnterTag = fn tinfo state =>
|
adam@1
|
44 case state of
|
adam@1
|
45 None =>
|
adam@1
|
46 (case parent.EnterTag tinfo parent.Initial of
|
adam@1
|
47 None => None
|
adam@1
|
48 | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
|
adam@1
|
49 (@@Folder.mp [fst] [_] fl))))
|
adam@1
|
50 | Some (pstate, depth, cstates) =>
|
adam@1
|
51 Some (Some (pstate,
|
adam@1
|
52 depth+1,
|
adam@1
|
53 @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
|
adam@1
|
54 (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) =>
|
adam@1
|
55 case cstate of
|
adam@1
|
56 Failed => Failed
|
adam@1
|
57 | Initial =>
|
adam@1
|
58 (case ch.EnterTag tinfo ch.Initial of
|
adam@1
|
59 None => Failed
|
adam@1
|
60 | Some v => Matched v)
|
adam@1
|
61 | v => v)
|
adam@1
|
62 fl children cstates)),
|
adam@1
|
63 ExitTag = fn state =>
|
adam@1
|
64 case state of
|
adam@1
|
65 None => None
|
adam@1
|
66 | Some (pstate, depth, cstates) =>
|
adam@1
|
67 case (if depth = 1 then
|
adam@1
|
68 parent.ExitTag pstate
|
adam@1
|
69 else
|
adam@1
|
70 Some pstate) of
|
adam@1
|
71 None => None
|
adam@1
|
72 | Some pstate =>
|
adam@1
|
73 if depth = 1 then
|
adam@1
|
74 Some (Some (pstate, 0, cstates))
|
adam@1
|
75 else
|
adam@1
|
76 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i]
|
adam@1
|
77 [fn cs => option $(map (fn (i, d) => status i) cs)]
|
adam@1
|
78 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
|
adam@1
|
79 case acc of
|
adam@1
|
80 None => None
|
adam@1
|
81 | Some acc =>
|
adam@1
|
82 case cstate of
|
adam@1
|
83 Matched cstate =>
|
adam@1
|
84 (case ch.ExitTag cstate of
|
adam@1
|
85 None => None
|
adam@1
|
86 | Some cstate' => Some ({nm = Matched cstate'} ++ acc))
|
adam@1
|
87 | _ => Some ({nm = Initial} ++ acc))
|
adam@1
|
88 (Some {}) fl children cstates of
|
adam@1
|
89 None => None
|
adam@1
|
90 | Some cstates =>
|
adam@1
|
91 Some (Some (pstate, depth-1, cstates)),
|
adam@1
|
92 Finished = fn state =>
|
adam@1
|
93 case state of
|
adam@1
|
94 Some (pstate, 0, cstates) =>
|
adam@1
|
95 (case parent.Finished pstate of
|
adam@1
|
96 None => None
|
adam@1
|
97 | Some pdata =>
|
adam@1
|
98 case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
|
adam@1
|
99 (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
|
adam@1
|
100 case acc of
|
adam@1
|
101 None => None
|
adam@1
|
102 | Some acc =>
|
adam@1
|
103 case cstate of
|
adam@1
|
104 Initial => None
|
adam@1
|
105 | Failed => None
|
adam@1
|
106 | Matched cstate =>
|
adam@1
|
107 case ch.Finished cstate of
|
adam@1
|
108 None => None
|
adam@1
|
109 | Some cdata => Some ({nm = cdata} ++ acc))
|
adam@1
|
110 (Some {}) fl children cstates of
|
adam@1
|
111 None => None
|
adam@1
|
112 | Some cdata => Some (pdata, cdata))
|
adam@1
|
113 | _ => None}
|
adam@1
|
114
|
adam@1
|
115 fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
|
adam@1
|
116 let
|
adam@1
|
117 fun recur xml state =
|
adam@1
|
118 case String.split xml #"<" of
|
adam@1
|
119 None => return ()
|
adam@1
|
120 | Some (_, xml) =>
|
adam@1
|
121 if xml <> "" && String.sub xml 0 = #"/" then
|
adam@1
|
122 case String.split xml #"\x3E" of
|
adam@1
|
123 None => return ()
|
adam@1
|
124 | Some (_, xml) =>
|
adam@1
|
125 case p.ExitTag state of
|
adam@1
|
126 None => recur xml p.Initial
|
adam@1
|
127 | Some state =>
|
adam@1
|
128 case p.Finished state of
|
adam@1
|
129 None => recur xml state
|
adam@1
|
130 | Some data =>
|
adam@1
|
131 f data;
|
adam@1
|
132 recur xml p.Initial
|
adam@1
|
133 else if xml <> "" && String.sub xml 0 = #"?" then
|
adam@1
|
134 case String.split xml #"\x3E" of
|
adam@1
|
135 None => return ()
|
adam@1
|
136 | Some (_, xml) => recur xml state
|
adam@1
|
137 else if xml <> "" && String.sub xml 0 = #"!" then
|
adam@1
|
138 if String.length xml >= 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
|
adam@1
|
139 let
|
adam@1
|
140 fun skipper xml =
|
adam@1
|
141 case String.split xml #"-" of
|
adam@1
|
142 None => xml
|
adam@1
|
143 | Some (_, xml) =>
|
adam@1
|
144 if String.length xml >= 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
|
adam@1
|
145 String.suffix xml 2
|
adam@1
|
146 else
|
adam@1
|
147 skipper xml
|
adam@1
|
148 in
|
adam@1
|
149 recur (skipper (String.suffix xml 3)) state
|
adam@1
|
150 end
|
adam@1
|
151 else
|
adam@1
|
152 case String.split xml #"]" of
|
adam@1
|
153 None => return ()
|
adam@1
|
154 | Some (_, xml) =>
|
adam@1
|
155 case String.split xml #"\x3E" of
|
adam@1
|
156 None => return ()
|
adam@1
|
157 | Some (_, xml) => recur xml state
|
adam@1
|
158 else
|
adam@1
|
159 case String.msplit {Needle = " >/", Haystack = xml} of
|
adam@1
|
160 None => return ()
|
adam@1
|
161 | Some (tagName, ch, xml) =>
|
adam@1
|
162 let
|
adam@1
|
163 fun readAttrs ch xml acc =
|
adam@1
|
164 case ch of
|
adam@1
|
165 #"\x3E" => (xml, acc, False)
|
adam@1
|
166 | #"/" =>
|
adam@1
|
167 (case String.split xml #"\x3E" of
|
adam@1
|
168 None => (xml, acc, True)
|
adam@1
|
169 | Some (_, xml) => (xml, acc, True))
|
adam@1
|
170 | _ =>
|
adam@1
|
171 if String.length xml >= 2 && Char.isSpace (String.sub xml 0) then
|
adam@1
|
172 readAttrs (String.sub xml 0) (String.suffix xml 1) acc
|
adam@1
|
173 else if xml <> "" && String.sub xml 0 = #"\x3E" then
|
adam@1
|
174 (String.suffix xml 1, acc, False)
|
adam@1
|
175 else if xml <> "" && String.sub xml 0 = #"/" then
|
adam@1
|
176 (case String.split xml #"\x3E" of
|
adam@1
|
177 None => (xml, acc, True)
|
adam@1
|
178 | Some (_, xml) => (xml, acc, True))
|
adam@1
|
179 else
|
adam@1
|
180 case String.split xml #"=" of
|
adam@1
|
181 None => (xml, acc, False)
|
adam@1
|
182 | Some (aname, xml) =>
|
adam@1
|
183 if xml = "" || String.sub xml 0 <> #"\"" then
|
adam@1
|
184 (xml, (aname, "") :: acc, False)
|
adam@1
|
185 else
|
adam@1
|
186 case String.split (String.suffix xml 1) #"\"" of
|
adam@1
|
187 None => (xml, (aname, "") :: acc, False)
|
adam@1
|
188 | Some (value, xml) =>
|
adam@1
|
189 if xml = "" then
|
adam@1
|
190 (xml, (aname, value) :: acc, False)
|
adam@1
|
191 else
|
adam@1
|
192 readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
|
adam@1
|
193
|
adam@1
|
194 val (xml, attrs, ended) = readAttrs ch xml []
|
adam@1
|
195
|
adam@1
|
196 fun skipSpaces xml =
|
adam@1
|
197 if xml <> "" && Char.isSpace (String.sub xml 0) then
|
adam@1
|
198 skipSpaces (String.suffix xml 1)
|
adam@1
|
199 else
|
adam@1
|
200 xml
|
adam@1
|
201
|
adam@1
|
202 val xml = skipSpaces xml
|
adam@1
|
203
|
adam@1
|
204 val (xml, cdata) =
|
adam@1
|
205 if ended then
|
adam@1
|
206 (xml, None)
|
adam@1
|
207 else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
|
adam@1
|
208 let
|
adam@1
|
209 fun skipper xml acc =
|
adam@1
|
210 case String.split xml #"]" of
|
adam@1
|
211 None => (acc ^ xml, None)
|
adam@1
|
212 | Some (pre, xml) =>
|
adam@1
|
213 if String.length xml >= 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
|
adam@1
|
214 (String.suffix xml 2, Some (acc ^ pre))
|
adam@1
|
215 else
|
adam@1
|
216 skipper xml (acc ^ "]" ^ pre)
|
adam@1
|
217 in
|
adam@1
|
218 skipper (String.suffix xml 9) ""
|
adam@1
|
219 end
|
adam@1
|
220 else
|
adam@1
|
221 case String.split xml #"<" of
|
adam@1
|
222 None => (xml, None)
|
adam@1
|
223 | Some (cdata, xml) => ("<" ^ xml, Some cdata)
|
adam@1
|
224 in
|
adam@1
|
225 case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
|
adam@1
|
226 None => recur xml p.Initial
|
adam@1
|
227 | Some state =>
|
adam@1
|
228 case (if ended then p.ExitTag state else Some state) of
|
adam@1
|
229 None => recur xml p.Initial
|
adam@1
|
230 | Some state =>
|
adam@1
|
231 case p.Finished state of
|
adam@1
|
232 None => recur xml state
|
adam@1
|
233 | Some data =>
|
adam@1
|
234 f data;
|
adam@1
|
235 recur xml p.Initial
|
adam@1
|
236 end
|
adam@1
|
237 in
|
adam@1
|
238 xml <- FeedFfi.fetch url;
|
adam@1
|
239 recur xml p.Initial
|
adam@1
|
240 end
|