diff src/ur/feed.ur @ 1:8de269c09617

Outputting a summary of Marginal Revolution RSS feed
author Adam Chlipala <adam@chlipala.net>
date Tue, 11 Jan 2011 13:17:44 -0500
parents ad85b8813e8a
children 2ec84d349838
line wrap: on
line diff
--- a/src/ur/feed.ur	Tue Jan 11 10:31:48 2011 -0500
+++ b/src/ur/feed.ur	Tue Jan 11 13:17:44 2011 -0500
@@ -1,3 +1,240 @@
 task initialize = fn () => FeedFfi.init
 
-val fetch = FeedFfi.fetch
+datatype pattern internal output =
+         Transducer of {Initial : internal,
+                        EnterTag : {Tag : string, Attrs : list (string * string), Cdata : option string} -> internal -> option internal,
+                        ExitTag : internal -> option internal,
+                        Finished : internal -> option output}
+
+con tagInternal (attrs :: {Unit}) = option {Attrs : $(mapU string attrs), Cdata : option string}
+
+fun tag [attrs ::: {Unit}] (fl : folder attrs) (name : string) (attrs : $(mapU string attrs))
+    : pattern (tagInternal attrs) {Attrs : $(mapU string attrs), Cdata : option string} =
+    Transducer {Initial = None,
+                EnterTag = fn tinfo state =>
+                              case state of
+                                  Some _ => None
+                                | None =>
+                                  if tinfo.Tag <> name then
+                                      None
+                                  else
+                                      case @foldUR [string] [fn r => option $(mapU string r)]
+                                            (fn [nm ::_] [r ::_] [[nm] ~ r] aname ro =>
+                                                case ro of
+                                                    None => None
+                                                  | Some r =>
+                                                    case List.assoc aname tinfo.Attrs of
+                                                        None => None
+                                                      | Some v => Some ({nm = v} ++ r))
+                                            (Some {}) fl attrs of
+                                          None => None
+                                        | Some vs => Some (Some {Attrs = vs, Cdata = tinfo.Cdata}),
+                ExitTag = Some,
+                Finished = fn x => x}
+
+datatype status a = Initial | Failed | Matched of a
+
+con childrenInternal (parent :: Type) (children :: {Type}) = option (parent * int * $(map status children))
+
+fun children [parentI ::: Type] [parent ::: Type] [children ::: {(Type * Type)}]
+             ((Transducer parent) : pattern parentI parent) (children : $(map (fn (i, d) => pattern i d) children)) (fl : folder children)
+    : pattern (childrenInternal parentI (map fst children)) (parent * $(map snd children)) =
+    Transducer {Initial = None,
+                EnterTag = fn tinfo state =>
+                              case state of
+                                  None =>
+                                  (case parent.EnterTag tinfo parent.Initial of
+                                       None => None
+                                     | Some pstate => Some (Some (pstate, 1, @map0 [status] (fn [t ::_] => Initial)
+                                                                              (@@Folder.mp [fst] [_] fl))))
+                                | Some (pstate, depth, cstates) =>
+                                  Some (Some (pstate,
+                                              depth+1,
+                                              @map2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn (i, d) => status i]
+                                               (fn [p] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) =>
+                                                   case cstate of
+                                                       Failed => Failed
+                                                     | Initial =>
+                                                       (case ch.EnterTag tinfo ch.Initial of
+                                                            None => Failed
+                                                          | Some v => Matched v)
+                                                     | v => v)
+                                               fl children cstates)),
+                ExitTag = fn state =>
+                             case state of
+                                 None => None
+                               | Some (pstate, depth, cstates) =>
+                                 case (if depth = 1 then
+                                           parent.ExitTag pstate
+                                       else
+                                           Some pstate) of
+                                     None => None
+                                   | Some pstate =>
+                                     if depth = 1 then
+                                         Some (Some (pstate, 0, cstates))
+                                     else
+                                         case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i]
+                                               [fn cs => option $(map (fn (i, d) => status i) cs)]
+                                               (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
+                                                   case acc of
+                                                       None => None
+                                                     | Some acc =>
+                                                       case cstate of
+                                                           Matched cstate =>
+                                                           (case ch.ExitTag cstate of
+                                                                None => None
+                                                              | Some cstate' => Some ({nm = Matched cstate'} ++ acc))
+                                                         | _ => Some ({nm = Initial} ++ acc))
+                                               (Some {}) fl children cstates of
+                                             None => None
+                                           | Some cstates =>
+                                             Some (Some (pstate, depth-1, cstates)),
+                Finished = fn state =>
+                              case state of
+                                  Some (pstate, 0, cstates) =>
+                                  (case parent.Finished pstate of
+                                       None => None
+                                     | Some pdata =>
+                                       case @foldR2 [fn (i, d) => pattern i d] [fn (i, d) => status i] [fn cs => option $(map snd cs)]
+                                             (fn [nm ::_] [p ::_] [r ::_] [[nm] ~ r] ((Transducer ch) : pattern p.1 p.2) (cstate : status p.1) acc =>
+                                                 case acc of
+                                                     None => None
+                                                   | Some acc =>
+                                                     case cstate of
+                                                         Initial => None
+                                                       | Failed => None
+                                                       | Matched cstate =>
+                                                         case ch.Finished cstate of
+                                                             None => None
+                                                           | Some cdata => Some ({nm = cdata} ++ acc))
+                                             (Some {}) fl children cstates of
+                                           None => None
+                                         | Some cdata => Some (pdata, cdata))
+                                | _ => None}
+
+fun app [internal ::: Type] [data ::: Type] ((Transducer p) : pattern internal data) (f : data -> transaction {}) (url : string) : transaction {} =
+    let
+        fun recur xml state =
+            case String.split xml #"<" of
+                None => return ()
+              | Some (_, xml) =>
+                if xml <> "" && String.sub xml 0 = #"/" then
+                    case String.split xml #"\x3E" of
+                        None => return ()
+                      | Some (_, xml) =>
+                        case p.ExitTag state of
+                            None => recur xml p.Initial
+                          | Some state =>
+                            case p.Finished state of
+                                 None => recur xml state
+                               | Some data =>
+                                 f data;
+                                 recur xml p.Initial
+                else if xml <> "" && String.sub xml 0 = #"?" then
+                    case String.split xml #"\x3E" of
+                        None => return ()
+                      | Some (_, xml) => recur xml state
+                else if xml <> "" && String.sub xml 0 = #"!" then
+                    if String.length xml >= 3 && String.sub xml 1 = #"-" && String.sub xml 2 = #"-" then
+                        let
+                            fun skipper xml =
+                                case String.split xml #"-" of
+                                    None => xml
+                                  | Some (_, xml) =>
+                                    if String.length xml >= 2 && String.sub xml 0 = #"-" && String.sub xml 1 = #"\x3E" then
+                                        String.suffix xml 2
+                                    else
+                                        skipper xml
+                        in
+                            recur (skipper (String.suffix xml 3)) state
+                        end
+                    else
+                        case String.split xml #"]" of
+                            None => return ()
+                          | Some (_, xml) =>
+                            case String.split xml #"\x3E" of
+                                None => return ()
+                              | Some (_, xml) => recur xml state
+                else
+                    case String.msplit {Needle = " >/", Haystack = xml} of
+                        None => return ()
+                      | Some (tagName, ch, xml) =>
+                        let
+                            fun readAttrs ch xml acc =
+                                case ch of
+                                    #"\x3E" => (xml, acc, False)
+                                  | #"/" =>
+                                    (case String.split xml #"\x3E" of
+                                         None => (xml, acc, True)
+                                       | Some (_, xml) => (xml, acc, True))
+                                  | _ =>
+                                    if String.length xml >= 2 && Char.isSpace (String.sub xml 0) then
+                                        readAttrs (String.sub xml 0) (String.suffix xml 1) acc
+                                    else if xml <> "" && String.sub xml 0 = #"\x3E" then
+                                        (String.suffix xml 1, acc, False)
+                                    else if xml <> "" && String.sub xml 0 = #"/" then
+                                        (case String.split xml #"\x3E" of
+                                             None => (xml, acc, True)
+                                           | Some (_, xml) => (xml, acc, True))
+                                    else
+                                        case String.split xml #"=" of
+                                            None => (xml, acc, False)
+                                          | Some (aname, xml) =>
+                                            if xml = "" || String.sub xml 0 <> #"\"" then
+                                                (xml, (aname, "") :: acc, False)
+                                            else
+                                                case String.split (String.suffix xml 1) #"\"" of
+                                                    None => (xml, (aname, "") :: acc, False)
+                                                  | Some (value, xml) =>
+                                                    if xml = "" then
+                                                        (xml, (aname, value) :: acc, False)
+                                                    else
+                                                        readAttrs (String.sub xml 0) (String.suffix xml 1) ((aname, value) :: acc)
+
+                            val (xml, attrs, ended) = readAttrs ch xml []
+
+                            fun skipSpaces xml =
+                                if xml <> "" && Char.isSpace (String.sub xml 0) then
+                                    skipSpaces (String.suffix xml 1)
+                                else
+                                    xml
+
+                            val xml = skipSpaces xml
+
+                            val (xml, cdata) =
+                                if ended then
+                                    (xml, None)
+                                else if String.isPrefix {Prefix = "<![CDATA[", Full = xml} then
+                                    let
+                                        fun skipper xml acc =
+                                            case String.split xml #"]" of
+                                                None => (acc ^ xml, None)
+                                              | Some (pre, xml) =>
+                                                if String.length xml >= 2 && String.sub xml 0 = #"]" && String.sub xml 1 = #"\x3E" then
+                                                    (String.suffix xml 2, Some (acc ^ pre))
+                                                else
+                                                    skipper xml (acc ^ "]" ^ pre)
+                                    in
+                                        skipper (String.suffix xml 9) ""
+                                    end
+                                else
+                                    case String.split xml #"<" of
+                                        None => (xml, None)
+                                      | Some (cdata, xml) => ("<" ^ xml, Some cdata)
+                        in
+                            case p.EnterTag {Tag = tagName, Attrs = attrs, Cdata = cdata} state of
+                                None => recur xml p.Initial
+                              | Some state =>
+                                case (if ended then p.ExitTag state else Some state) of
+                                    None => recur xml p.Initial
+                                  | Some state =>
+                                    case p.Finished state of
+                                         None => recur xml state
+                                       | Some data =>
+                                         f data;
+                                         recur xml p.Initial
+                        end
+    in
+        xml <- FeedFfi.fetch url;
+        recur xml p.Initial
+    end