[391] | 1 | """
|
---|
| 2 | A demo that reads in an RSS XML document and emits an HTML file containing
|
---|
| 3 | a list of the individual items in the feed.
|
---|
| 4 | """
|
---|
| 5 |
|
---|
[2] | 6 | import sys
|
---|
[391] | 7 | import codecs
|
---|
[2] | 8 |
|
---|
| 9 | from xml.sax import make_parser, handler
|
---|
| 10 |
|
---|
| 11 | # --- Templates
|
---|
| 12 |
|
---|
[391] | 13 | top = """\
|
---|
[2] | 14 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
---|
[391] | 15 | <html>
|
---|
| 16 | <head>
|
---|
| 17 | <title>%s</title>
|
---|
| 18 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
---|
| 19 | </head>
|
---|
[2] | 20 |
|
---|
[391] | 21 | <body>
|
---|
| 22 | <h1>%s</h1>
|
---|
[2] | 23 | """
|
---|
| 24 |
|
---|
[391] | 25 | bottom = """
|
---|
[2] | 26 | </ul>
|
---|
| 27 |
|
---|
[391] | 28 | <hr>
|
---|
| 29 | <address>
|
---|
| 30 | Converted to HTML by rss2html.py.
|
---|
| 31 | </address>
|
---|
[2] | 32 |
|
---|
[391] | 33 | </body>
|
---|
| 34 | </html>
|
---|
[2] | 35 | """
|
---|
| 36 |
|
---|
| 37 | # --- The ContentHandler
|
---|
| 38 |
|
---|
| 39 | class RSSHandler(handler.ContentHandler):
|
---|
| 40 |
|
---|
[391] | 41 | def __init__(self, out=sys.stdout):
|
---|
[2] | 42 | handler.ContentHandler.__init__(self)
|
---|
[391] | 43 | self._out = codecs.getwriter('utf-8')(out)
|
---|
[2] | 44 |
|
---|
| 45 | self._text = ""
|
---|
| 46 | self._parent = None
|
---|
[391] | 47 | self._list_started = False
|
---|
[2] | 48 | self._title = None
|
---|
| 49 | self._link = None
|
---|
| 50 | self._descr = ""
|
---|
| 51 |
|
---|
| 52 | # ContentHandler methods
|
---|
| 53 |
|
---|
| 54 | def startElement(self, name, attrs):
|
---|
| 55 | if name == "channel" or name == "image" or name == "item":
|
---|
| 56 | self._parent = name
|
---|
| 57 |
|
---|
| 58 | self._text = ""
|
---|
| 59 |
|
---|
| 60 | def endElement(self, name):
|
---|
| 61 | if self._parent == "channel":
|
---|
| 62 | if name == "title":
|
---|
| 63 | self._out.write(top % (self._text, self._text))
|
---|
| 64 | elif name == "description":
|
---|
| 65 | self._out.write("<p>%s</p>\n" % self._text)
|
---|
| 66 |
|
---|
| 67 | elif self._parent == "item":
|
---|
| 68 | if name == "title":
|
---|
| 69 | self._title = self._text
|
---|
| 70 | elif name == "link":
|
---|
| 71 | self._link = self._text
|
---|
| 72 | elif name == "description":
|
---|
| 73 | self._descr = self._text
|
---|
| 74 | elif name == "item":
|
---|
| 75 | if not self._list_started:
|
---|
| 76 | self._out.write("<ul>\n")
|
---|
[391] | 77 | self._list_started = True
|
---|
[2] | 78 |
|
---|
| 79 | self._out.write(' <li><a href="%s">%s</a> %s\n' %
|
---|
| 80 | (self._link, self._title, self._descr))
|
---|
| 81 |
|
---|
| 82 | self._title = None
|
---|
| 83 | self._link = None
|
---|
| 84 | self._descr = ""
|
---|
| 85 |
|
---|
| 86 | if name == "rss":
|
---|
| 87 | self._out.write(bottom)
|
---|
| 88 |
|
---|
| 89 | def characters(self, content):
|
---|
| 90 | self._text = self._text + content
|
---|
| 91 |
|
---|
| 92 | # --- Main program
|
---|
| 93 |
|
---|
[391] | 94 | if __name__ == '__main__':
|
---|
| 95 | parser = make_parser()
|
---|
| 96 | parser.setContentHandler(RSSHandler())
|
---|
| 97 | parser.parse(sys.argv[1])
|
---|