| 1 | """
|
|---|
| 2 | A demo that reads in an RSS XML document and emits an HTML file containing
|
|---|
| 3 | a list of the individual items in the feed.
|
|---|
| 4 | """
|
|---|
| 5 |
|
|---|
| 6 | import sys
|
|---|
| 7 | import codecs
|
|---|
| 8 |
|
|---|
| 9 | from xml.sax import make_parser, handler
|
|---|
| 10 |
|
|---|
| 11 | # --- Templates
|
|---|
| 12 |
|
|---|
| 13 | top = """\
|
|---|
| 14 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
|---|
| 15 | <html>
|
|---|
| 16 | <head>
|
|---|
| 17 | <title>%s</title>
|
|---|
| 18 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
|
|---|
| 19 | </head>
|
|---|
| 20 |
|
|---|
| 21 | <body>
|
|---|
| 22 | <h1>%s</h1>
|
|---|
| 23 | """
|
|---|
| 24 |
|
|---|
| 25 | bottom = """
|
|---|
| 26 | </ul>
|
|---|
| 27 |
|
|---|
| 28 | <hr>
|
|---|
| 29 | <address>
|
|---|
| 30 | Converted to HTML by rss2html.py.
|
|---|
| 31 | </address>
|
|---|
| 32 |
|
|---|
| 33 | </body>
|
|---|
| 34 | </html>
|
|---|
| 35 | """
|
|---|
| 36 |
|
|---|
| 37 | # --- The ContentHandler
|
|---|
| 38 |
|
|---|
| 39 | class RSSHandler(handler.ContentHandler):
|
|---|
| 40 |
|
|---|
| 41 | def __init__(self, out=sys.stdout):
|
|---|
| 42 | handler.ContentHandler.__init__(self)
|
|---|
| 43 | self._out = codecs.getwriter('utf-8')(out)
|
|---|
| 44 |
|
|---|
| 45 | self._text = ""
|
|---|
| 46 | self._parent = None
|
|---|
| 47 | self._list_started = False
|
|---|
| 48 | self._title = None
|
|---|
| 49 | self._link = None
|
|---|
| 50 | self._descr = ""
|
|---|
| 51 |
|
|---|
| 52 | # ContentHandler methods
|
|---|
| 53 |
|
|---|
| 54 | def startElement(self, name, attrs):
|
|---|
| 55 | if name == "channel" or name == "image" or name == "item":
|
|---|
| 56 | self._parent = name
|
|---|
| 57 |
|
|---|
| 58 | self._text = ""
|
|---|
| 59 |
|
|---|
| 60 | def endElement(self, name):
|
|---|
| 61 | if self._parent == "channel":
|
|---|
| 62 | if name == "title":
|
|---|
| 63 | self._out.write(top % (self._text, self._text))
|
|---|
| 64 | elif name == "description":
|
|---|
| 65 | self._out.write("<p>%s</p>\n" % self._text)
|
|---|
| 66 |
|
|---|
| 67 | elif self._parent == "item":
|
|---|
| 68 | if name == "title":
|
|---|
| 69 | self._title = self._text
|
|---|
| 70 | elif name == "link":
|
|---|
| 71 | self._link = self._text
|
|---|
| 72 | elif name == "description":
|
|---|
| 73 | self._descr = self._text
|
|---|
| 74 | elif name == "item":
|
|---|
| 75 | if not self._list_started:
|
|---|
| 76 | self._out.write("<ul>\n")
|
|---|
| 77 | self._list_started = True
|
|---|
| 78 |
|
|---|
| 79 | self._out.write(' <li><a href="%s">%s</a> %s\n' %
|
|---|
| 80 | (self._link, self._title, self._descr))
|
|---|
| 81 |
|
|---|
| 82 | self._title = None
|
|---|
| 83 | self._link = None
|
|---|
| 84 | self._descr = ""
|
|---|
| 85 |
|
|---|
| 86 | if name == "rss":
|
|---|
| 87 | self._out.write(bottom)
|
|---|
| 88 |
|
|---|
| 89 | def characters(self, content):
|
|---|
| 90 | self._text = self._text + content
|
|---|
| 91 |
|
|---|
| 92 | # --- Main program
|
|---|
| 93 |
|
|---|
| 94 | if __name__ == '__main__':
|
|---|
| 95 | parser = make_parser()
|
|---|
| 96 | parser.setContentHandler(RSSHandler())
|
|---|
| 97 | parser.parse(sys.argv[1])
|
|---|