| 1 | """ | 
|---|
| 2 | A demo that reads in an RSS XML document and emits an HTML file containing | 
|---|
| 3 | a list of the individual items in the feed. | 
|---|
| 4 | """ | 
|---|
| 5 |  | 
|---|
| 6 | import sys | 
|---|
| 7 | import codecs | 
|---|
| 8 |  | 
|---|
| 9 | from xml.sax import make_parser, handler | 
|---|
| 10 |  | 
|---|
| 11 | # --- Templates | 
|---|
| 12 |  | 
|---|
| 13 | top = """\ | 
|---|
| 14 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> | 
|---|
| 15 | <html> | 
|---|
| 16 | <head> | 
|---|
| 17 | <title>%s</title> | 
|---|
| 18 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> | 
|---|
| 19 | </head> | 
|---|
| 20 |  | 
|---|
| 21 | <body> | 
|---|
| 22 | <h1>%s</h1> | 
|---|
| 23 | """ | 
|---|
| 24 |  | 
|---|
| 25 | bottom = """ | 
|---|
| 26 | </ul> | 
|---|
| 27 |  | 
|---|
| 28 | <hr> | 
|---|
| 29 | <address> | 
|---|
| 30 | Converted to HTML by rss2html.py. | 
|---|
| 31 | </address> | 
|---|
| 32 |  | 
|---|
| 33 | </body> | 
|---|
| 34 | </html> | 
|---|
| 35 | """ | 
|---|
| 36 |  | 
|---|
| 37 | # --- The ContentHandler | 
|---|
| 38 |  | 
|---|
| 39 | class RSSHandler(handler.ContentHandler): | 
|---|
| 40 |  | 
|---|
| 41 | def __init__(self, out=sys.stdout): | 
|---|
| 42 | handler.ContentHandler.__init__(self) | 
|---|
| 43 | self._out = codecs.getwriter('utf-8')(out) | 
|---|
| 44 |  | 
|---|
| 45 | self._text = "" | 
|---|
| 46 | self._parent = None | 
|---|
| 47 | self._list_started = False | 
|---|
| 48 | self._title = None | 
|---|
| 49 | self._link = None | 
|---|
| 50 | self._descr = "" | 
|---|
| 51 |  | 
|---|
| 52 | # ContentHandler methods | 
|---|
| 53 |  | 
|---|
| 54 | def startElement(self, name, attrs): | 
|---|
| 55 | if name == "channel" or name == "image" or name == "item": | 
|---|
| 56 | self._parent = name | 
|---|
| 57 |  | 
|---|
| 58 | self._text = "" | 
|---|
| 59 |  | 
|---|
| 60 | def endElement(self, name): | 
|---|
| 61 | if self._parent == "channel": | 
|---|
| 62 | if name == "title": | 
|---|
| 63 | self._out.write(top % (self._text, self._text)) | 
|---|
| 64 | elif name == "description": | 
|---|
| 65 | self._out.write("<p>%s</p>\n" % self._text) | 
|---|
| 66 |  | 
|---|
| 67 | elif self._parent == "item": | 
|---|
| 68 | if name == "title": | 
|---|
| 69 | self._title = self._text | 
|---|
| 70 | elif name == "link": | 
|---|
| 71 | self._link = self._text | 
|---|
| 72 | elif name == "description": | 
|---|
| 73 | self._descr = self._text | 
|---|
| 74 | elif name == "item": | 
|---|
| 75 | if not self._list_started: | 
|---|
| 76 | self._out.write("<ul>\n") | 
|---|
| 77 | self._list_started = True | 
|---|
| 78 |  | 
|---|
| 79 | self._out.write('  <li><a href="%s">%s</a> %s\n' % | 
|---|
| 80 | (self._link, self._title, self._descr)) | 
|---|
| 81 |  | 
|---|
| 82 | self._title = None | 
|---|
| 83 | self._link = None | 
|---|
| 84 | self._descr = "" | 
|---|
| 85 |  | 
|---|
| 86 | if name == "rss": | 
|---|
| 87 | self._out.write(bottom) | 
|---|
| 88 |  | 
|---|
| 89 | def characters(self, content): | 
|---|
| 90 | self._text = self._text + content | 
|---|
| 91 |  | 
|---|
| 92 | # --- Main program | 
|---|
| 93 |  | 
|---|
| 94 | if __name__ == '__main__': | 
|---|
| 95 | parser = make_parser() | 
|---|
| 96 | parser.setContentHandler(RSSHandler()) | 
|---|
| 97 | parser.parse(sys.argv[1]) | 
|---|