1 | """
|
---|
2 | A demo that reads in an RSS XML document and emits an HTML file containing
|
---|
3 | a list of the individual items in the feed.
|
---|
4 | """
|
---|
5 |
|
---|
6 | import sys
|
---|
7 | import codecs
|
---|
8 |
|
---|
9 | from xml.sax import make_parser, handler
|
---|
10 |
|
---|
11 | # --- Templates
|
---|
12 |
|
---|
13 | top = """\
|
---|
14 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
---|
15 | <html>
|
---|
16 | <head>
|
---|
17 | <title>%s</title>
|
---|
18 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
---|
19 | </head>
|
---|
20 |
|
---|
21 | <body>
|
---|
22 | <h1>%s</h1>
|
---|
23 | """
|
---|
24 |
|
---|
25 | bottom = """
|
---|
26 | </ul>
|
---|
27 |
|
---|
28 | <hr>
|
---|
29 | <address>
|
---|
30 | Converted to HTML by rss2html.py.
|
---|
31 | </address>
|
---|
32 |
|
---|
33 | </body>
|
---|
34 | </html>
|
---|
35 | """
|
---|
36 |
|
---|
37 | # --- The ContentHandler
|
---|
38 |
|
---|
39 | class RSSHandler(handler.ContentHandler):
|
---|
40 |
|
---|
41 | def __init__(self, out=sys.stdout):
|
---|
42 | handler.ContentHandler.__init__(self)
|
---|
43 | self._out = codecs.getwriter('utf-8')(out)
|
---|
44 |
|
---|
45 | self._text = ""
|
---|
46 | self._parent = None
|
---|
47 | self._list_started = False
|
---|
48 | self._title = None
|
---|
49 | self._link = None
|
---|
50 | self._descr = ""
|
---|
51 |
|
---|
52 | # ContentHandler methods
|
---|
53 |
|
---|
54 | def startElement(self, name, attrs):
|
---|
55 | if name == "channel" or name == "image" or name == "item":
|
---|
56 | self._parent = name
|
---|
57 |
|
---|
58 | self._text = ""
|
---|
59 |
|
---|
60 | def endElement(self, name):
|
---|
61 | if self._parent == "channel":
|
---|
62 | if name == "title":
|
---|
63 | self._out.write(top % (self._text, self._text))
|
---|
64 | elif name == "description":
|
---|
65 | self._out.write("<p>%s</p>\n" % self._text)
|
---|
66 |
|
---|
67 | elif self._parent == "item":
|
---|
68 | if name == "title":
|
---|
69 | self._title = self._text
|
---|
70 | elif name == "link":
|
---|
71 | self._link = self._text
|
---|
72 | elif name == "description":
|
---|
73 | self._descr = self._text
|
---|
74 | elif name == "item":
|
---|
75 | if not self._list_started:
|
---|
76 | self._out.write("<ul>\n")
|
---|
77 | self._list_started = True
|
---|
78 |
|
---|
79 | self._out.write(' <li><a href="%s">%s</a> %s\n' %
|
---|
80 | (self._link, self._title, self._descr))
|
---|
81 |
|
---|
82 | self._title = None
|
---|
83 | self._link = None
|
---|
84 | self._descr = ""
|
---|
85 |
|
---|
86 | if name == "rss":
|
---|
87 | self._out.write(bottom)
|
---|
88 |
|
---|
89 | def characters(self, content):
|
---|
90 | self._text = self._text + content
|
---|
91 |
|
---|
92 | # --- Main program
|
---|
93 |
|
---|
94 | if __name__ == '__main__':
|
---|
95 | parser = make_parser()
|
---|
96 | parser.setContentHandler(RSSHandler())
|
---|
97 | parser.parse(sys.argv[1])
|
---|