adam@0: task initialize = fn () => FeedFfi.init adam@0: adam@1: datatype pattern internal output = adam@1: Transducer of {Initial : internal, adam@1: EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal, adam@1: ExitTag : internal -> option internal, adam@1: Finished : internal -> option output} adam@1: adam@1: con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string} adam@1: adam@1: fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs)) adam@1: : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} = adam@1: Transducer {Initial = None, adam@1: EnterTag = fn tinfo state => adam@1: case state of adam@1: Some _ => None adam@1: | None => adam@1: if tinfo.Tag <> name then adam@1: None adam@1: else adam@1: case @foldUR [string] [fn r => option $(mapU string r)] adam@1: (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro => adam@1: case ro of adam@1: None => None adam@1: | Some r => adam@1: case List.assoc aname tinfo.Attrs of adam@1: None => None adam@1: | Some v => Some ({nm = v} ++ r)) adam@1: (Some {}) fl attrs of adam@1: None => None adam@1: | Some vs => Some (Some {Attrs = vs, Cdata = tinfo.Cdata}), adam@1: ExitTag = Some, adam@1: Finished = fn x => x} adam@1: adam@1: datatype status a = Initial | Failed | Matched of a adam@1: adam@1: con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children)) adam@1: adam@1: fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}] adam@1: ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children) adam@1: : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) = adam@1: Transducer {Initial = None, adam@1: EnterTag = fn tinfo state => adam@1: case state of adam@1: None => adam@1: (case parent.EnterTag tinfo parent.Initial of adam@1: None => None adam@1: | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial) adam@1: (@@Folder.mp [fst] [_] fl)))) adam@1: | Some (pstate, depth, cstates) => adam@1: Some (Some (pstate, adam@1: depth+1, adam@1: @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i] adam@1: (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) => adam@1: case cstate of adam@1: Failed => Failed adam@1: | Initial => adam@1: (case ch.EnterTag tinfo ch.Initial of adam@1: None => Failed adam@1: | Some v => Matched v) adam@1: | v => v) adam@1: fl children cstates)), adam@1: ExitTag = fn state => adam@1: case state of adam@1: None => None adam@1: | Some (pstate, depth, cstates) => adam@1: case (if depth = 1 then adam@1: parent.ExitTag pstate adam@1: else adam@1: Some pstate) of adam@1: None => None adam@1: | Some pstate => adam@1: if depth = 1 then adam@1: Some (Some (pstate, 0, cstates)) adam@1: else adam@1: case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] adam@1: [fn cs => option $(map (fn (i, d) => status i) cs)] adam@1: (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc => adam@1: case acc of adam@1: None => None adam@1: | Some acc => adam@1: case cstate of adam@1: Matched cstate => adam@1: (case ch.ExitTag cstate of adam@1: None => None adam@1: | Some cstate' => Some ({nm = Matched cstate'} ++ acc)) adam@1: | _ => Some ({nm = Initial} ++ acc)) adam@1: (Some {}) fl children cstates of adam@1: None => None adam@1: | Some cstates => adam@1: Some (Some (pstate, depth-1, cstates)), adam@1: Finished = fn state => adam@1: case state of adam@1: Some (pstate, 0, cstates) => adam@1: (case parent.Finished pstate of adam@1: None => None adam@1: | Some pdata => adam@1: case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)] adam@1: (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc => adam@1: case acc of adam@1: None => None adam@1: | Some acc => adam@1: case cstate of adam@1: Initial => None adam@1: | Failed => None adam@1: | Matched cstate => adam@1: case ch.Finished cstate of adam@1: None => None adam@1: | Some cdata => Some ({nm = cdata} ++ acc)) adam@1: (Some {}) fl children cstates of adam@1: None => None adam@1: | Some cdata => Some (pdata, cdata)) adam@1: | _ => None} adam@1: adam@1: fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} = adam@1: let adam@1: fun recur xml state = adam@1: case String.split xml #"<" of adam@1: None => return () adam@1: | Some (_, xml) => adam@1: if xml <> "" && String.sub xml 0 = #"/" then adam@1: case String.split xml #"\x3E" of adam@1: None => return () adam@1: | Some (_, xml) => adam@1: case p.ExitTag state of adam@1: None => recur xml p.Initial adam@1: | Some state => adam@1: case p.Finished state of adam@1: None => recur xml state adam@1: | Some data => adam@1: f data; adam@1: recur xml p.Initial adam@1: else if xml <> "" && String.sub xml 0 = #"?" then adam@1: case String.split xml #"\x3E" of adam@1: None => return () adam@1: | Some (_, xml) => recur xml state adam@1: else if xml <> "" && String.sub xml 0 = #"!" then adam@2: if String.lengthGe xml 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then adam@1: let adam@1: fun skipper xml = adam@1: case String.split xml #"-" of adam@1: None => xml adam@1: | Some (_, xml) => adam@2: if String.lengthGe xml 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then adam@1: String.suffix xml 2 adam@1: else adam@1: skipper xml adam@1: in adam@1: recur (skipper (String.suffix xml 3)) state adam@1: end adam@1: else adam@1: case String.split xml #"]" of adam@1: None => return () adam@1: | Some (_, xml) => adam@1: case String.split xml #"\x3E" of adam@1: None => return () adam@1: | Some (_, xml) => recur xml state adam@1: else adam@1: case String.msplit {Needle = " >/", Haystack = xml} of adam@1: None => return () adam@1: | Some (tagName, ch, xml) => adam@1: let adam@1: fun readAttrs ch xml acc = adam@1: case ch of adam@1: #"\x3E" => (xml, acc, False) adam@1: | #"/" => adam@1: (case String.split xml #"\x3E" of adam@1: None => (xml, acc, True) adam@1: | Some (_, xml) => (xml, acc, True)) adam@1: | _ => adam@2: if String.lengthGe xml 2 && Char.isSpace (String.sub xml 0) then adam@1: readAttrs (String.sub xml 0) (String.suffix xml 1) acc adam@1: else if xml <> "" && String.sub xml 0 = #"\x3E" then adam@1: (String.suffix xml 1, acc, False) adam@1: else if xml <> "" && String.sub xml 0 = #"/" then adam@1: (case String.split xml #"\x3E" of adam@1: None => (xml, acc, True) adam@1: | Some (_, xml) => (xml, acc, True)) adam@1: else adam@1: case String.split xml #"=" of adam@1: None => (xml, acc, False) adam@1: | Some (aname, xml) => adam@1: if xml = "" || String.sub xml 0 <> #"\"" then adam@1: (xml, (aname, "") :: acc, False) adam@1: else adam@1: case String.split (String.suffix xml 1) #"\"" of adam@1: None => (xml, (aname, "") :: acc, False) adam@1: | Some (value, xml) => adam@1: if xml = "" then adam@1: (xml, (aname, value) :: acc, False) adam@1: else adam@1: readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc) adam@1: adam@1: val (xml, attrs, ended) = readAttrs ch xml [] adam@1: adam@1: fun skipSpaces xml = adam@1: if xml <> "" && Char.isSpace (String.sub xml 0) then adam@1: skipSpaces (String.suffix xml 1) adam@1: else adam@1: xml adam@1: adam@1: val xml = skipSpaces xml adam@1: adam@1: val (xml, cdata) = adam@1: if ended then adam@1: (xml, None) adam@1: else if String.isPrefix {Prefix = " (acc ^ xml, None) adam@1: | Some (pre, xml) => adam@2: if String.lengthGe xml 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then adam@1: (String.suffix xml 2, Some (acc ^ pre)) adam@1: else adam@1: skipper xml (acc ^ "]" ^ pre) adam@1: in adam@1: skipper (String.suffix xml 9) "" adam@1: end adam@1: else adam@1: case String.split xml #"<" of adam@1: None => (xml, None) adam@1: | Some (cdata, xml) => ("<" ^ xml, Some cdata) adam@1: in adam@1: case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of adam@1: None => recur xml p.Initial adam@1: | Some state => adam@1: case (if ended then p.ExitTag state else Some state) of adam@1: None => recur xml p.Initial adam@1: | Some state => adam@1: case p.Finished state of adam@1: None => recur xml state adam@1: | Some data => adam@1: f data; adam@1: recur xml p.Initial adam@1: end adam@1: in adam@1: xml <- FeedFfi.fetch url; adam@1: recur xml p.Initial adam@1: end