[2] | 1 | #! /usr/bin/env python
|
---|
| 2 |
|
---|
| 3 | """GUI interface to webchecker.
|
---|
| 4 |
|
---|
| 5 | This works as a Grail applet too! E.g.
|
---|
| 6 |
|
---|
| 7 | <APPLET CODE=wcgui.py NAME=CheckerWindow></APPLET>
|
---|
| 8 |
|
---|
| 9 | Checkpoints are not (yet??? ever???) supported.
|
---|
| 10 |
|
---|
| 11 | User interface:
|
---|
| 12 |
|
---|
| 13 | Enter a root to check in the text entry box. To enter more than one root,
|
---|
| 14 | enter them one at a time and press <Return> for each one.
|
---|
| 15 |
|
---|
| 16 | Command buttons Start, Stop and "Check one" govern the checking process in
|
---|
| 17 | the obvious way. Start and "Check one" also enter the root from the text
|
---|
| 18 | entry box if one is present. There's also a check box (enabled by default)
|
---|
| 19 | to decide whether actually to follow external links (since this can slow
|
---|
| 20 | the checking down considerably). Finally there's a Quit button.
|
---|
| 21 |
|
---|
| 22 | A series of checkbuttons determines whether the corresponding output panel
|
---|
| 23 | is shown. List panels are also automatically shown or hidden when their
|
---|
| 24 | status changes between empty to non-empty. There are six panels:
|
---|
| 25 |
|
---|
| 26 | Log -- raw output from the checker (-v, -q affect this)
|
---|
| 27 | To check -- links discovered but not yet checked
|
---|
| 28 | Checked -- links that have been checked
|
---|
| 29 | Bad links -- links that failed upon checking
|
---|
| 30 | Errors -- pages containing at least one bad link
|
---|
| 31 | Details -- details about one URL; double click on a URL in any of
|
---|
| 32 | the above list panels (not in Log) will show details
|
---|
| 33 | for that URL
|
---|
| 34 |
|
---|
| 35 | Use your window manager's Close command to quit.
|
---|
| 36 |
|
---|
| 37 | Command line options:
|
---|
| 38 |
|
---|
| 39 | -m bytes -- skip HTML pages larger than this size (default %(MAXPAGE)d)
|
---|
| 40 | -q -- quiet operation (also suppresses external links report)
|
---|
| 41 | -v -- verbose operation; repeating -v will increase verbosity
|
---|
| 42 | -t root -- specify root dir which should be treated as internal (can repeat)
|
---|
| 43 | -a -- don't check name anchors
|
---|
| 44 |
|
---|
| 45 | Command line arguments:
|
---|
| 46 |
|
---|
| 47 | rooturl -- URL to start checking
|
---|
| 48 | (default %(DEFROOT)s)
|
---|
| 49 |
|
---|
| 50 | XXX The command line options (-m, -q, -v) should be GUI accessible.
|
---|
| 51 |
|
---|
| 52 | XXX The roots should be visible as a list (?).
|
---|
| 53 |
|
---|
| 54 | XXX The multipanel user interface is clumsy.
|
---|
| 55 |
|
---|
| 56 | """
|
---|
| 57 |
|
---|
| 58 | # ' Emacs bait
|
---|
| 59 |
|
---|
| 60 |
|
---|
| 61 | import sys
|
---|
| 62 | import getopt
|
---|
| 63 | from Tkinter import *
|
---|
| 64 | import tktools
|
---|
| 65 | import webchecker
|
---|
| 66 |
|
---|
| 67 | def main():
|
---|
| 68 | try:
|
---|
| 69 | opts, args = getopt.getopt(sys.argv[1:], 't:m:qva')
|
---|
| 70 | except getopt.error, msg:
|
---|
| 71 | sys.stdout = sys.stderr
|
---|
| 72 | print msg
|
---|
| 73 | print __doc__%vars(webchecker)
|
---|
| 74 | sys.exit(2)
|
---|
| 75 | webchecker.verbose = webchecker.VERBOSE
|
---|
| 76 | webchecker.nonames = webchecker.NONAMES
|
---|
| 77 | webchecker.maxpage = webchecker.MAXPAGE
|
---|
| 78 | extra_roots = []
|
---|
| 79 | for o, a in opts:
|
---|
| 80 | if o == '-m':
|
---|
| 81 | webchecker.maxpage = int(a)
|
---|
| 82 | if o == '-q':
|
---|
| 83 | webchecker.verbose = 0
|
---|
| 84 | if o == '-v':
|
---|
| 85 | webchecker.verbose = webchecker.verbose + 1
|
---|
| 86 | if o == '-t':
|
---|
| 87 | extra_roots.append(a)
|
---|
| 88 | if o == '-a':
|
---|
| 89 | webchecker.nonames = not webchecker.nonames
|
---|
| 90 | root = Tk(className='Webchecker')
|
---|
| 91 | root.protocol("WM_DELETE_WINDOW", root.quit)
|
---|
| 92 | c = CheckerWindow(root)
|
---|
| 93 | c.setflags(verbose=webchecker.verbose, maxpage=webchecker.maxpage,
|
---|
| 94 | nonames=webchecker.nonames)
|
---|
| 95 | if args:
|
---|
| 96 | for arg in args[:-1]:
|
---|
| 97 | c.addroot(arg)
|
---|
| 98 | c.suggestroot(args[-1])
|
---|
| 99 | # Usually conditioned on whether external links
|
---|
| 100 | # will be checked, but since that's not a command
|
---|
| 101 | # line option, just toss them in.
|
---|
| 102 | for url_root in extra_roots:
|
---|
| 103 | # Make sure it's terminated by a slash,
|
---|
| 104 | # so that addroot doesn't discard the last
|
---|
| 105 | # directory component.
|
---|
| 106 | if url_root[-1] != "/":
|
---|
| 107 | url_root = url_root + "/"
|
---|
| 108 | c.addroot(url_root, add_to_do = 0)
|
---|
| 109 | root.mainloop()
|
---|
| 110 |
|
---|
| 111 |
|
---|
| 112 | class CheckerWindow(webchecker.Checker):
|
---|
| 113 |
|
---|
| 114 | def __init__(self, parent, root=webchecker.DEFROOT):
|
---|
| 115 | self.__parent = parent
|
---|
| 116 |
|
---|
| 117 | self.__topcontrols = Frame(parent)
|
---|
| 118 | self.__topcontrols.pack(side=TOP, fill=X)
|
---|
| 119 | self.__label = Label(self.__topcontrols, text="Root URL:")
|
---|
| 120 | self.__label.pack(side=LEFT)
|
---|
| 121 | self.__rootentry = Entry(self.__topcontrols, width=60)
|
---|
| 122 | self.__rootentry.pack(side=LEFT)
|
---|
| 123 | self.__rootentry.bind('<Return>', self.enterroot)
|
---|
| 124 | self.__rootentry.focus_set()
|
---|
| 125 |
|
---|
| 126 | self.__controls = Frame(parent)
|
---|
| 127 | self.__controls.pack(side=TOP, fill=X)
|
---|
| 128 | self.__running = 0
|
---|
| 129 | self.__start = Button(self.__controls, text="Run", command=self.start)
|
---|
| 130 | self.__start.pack(side=LEFT)
|
---|
| 131 | self.__stop = Button(self.__controls, text="Stop", command=self.stop,
|
---|
| 132 | state=DISABLED)
|
---|
| 133 | self.__stop.pack(side=LEFT)
|
---|
| 134 | self.__step = Button(self.__controls, text="Check one",
|
---|
| 135 | command=self.step)
|
---|
| 136 | self.__step.pack(side=LEFT)
|
---|
| 137 | self.__cv = BooleanVar(parent)
|
---|
| 138 | self.__cv.set(self.checkext)
|
---|
| 139 | self.__checkext = Checkbutton(self.__controls, variable=self.__cv,
|
---|
| 140 | command=self.update_checkext,
|
---|
| 141 | text="Check nonlocal links",)
|
---|
| 142 | self.__checkext.pack(side=LEFT)
|
---|
| 143 | self.__reset = Button(self.__controls, text="Start over", command=self.reset)
|
---|
| 144 | self.__reset.pack(side=LEFT)
|
---|
| 145 | if __name__ == '__main__': # No Quit button under Grail!
|
---|
| 146 | self.__quit = Button(self.__controls, text="Quit",
|
---|
| 147 | command=self.__parent.quit)
|
---|
| 148 | self.__quit.pack(side=RIGHT)
|
---|
| 149 |
|
---|
| 150 | self.__status = Label(parent, text="Status: initial", anchor=W)
|
---|
| 151 | self.__status.pack(side=TOP, fill=X)
|
---|
| 152 | self.__checking = Label(parent, text="Idle", anchor=W)
|
---|
| 153 | self.__checking.pack(side=TOP, fill=X)
|
---|
| 154 | self.__mp = mp = MultiPanel(parent)
|
---|
| 155 | sys.stdout = self.__log = LogPanel(mp, "Log")
|
---|
| 156 | self.__todo = ListPanel(mp, "To check", self, self.showinfo)
|
---|
| 157 | self.__done = ListPanel(mp, "Checked", self, self.showinfo)
|
---|
| 158 | self.__bad = ListPanel(mp, "Bad links", self, self.showinfo)
|
---|
| 159 | self.__errors = ListPanel(mp, "Pages w/ bad links", self, self.showinfo)
|
---|
| 160 | self.__details = LogPanel(mp, "Details")
|
---|
| 161 | self.root_seed = None
|
---|
| 162 | webchecker.Checker.__init__(self)
|
---|
| 163 | if root:
|
---|
| 164 | root = str(root).strip()
|
---|
| 165 | if root:
|
---|
| 166 | self.suggestroot(root)
|
---|
| 167 | self.newstatus()
|
---|
| 168 |
|
---|
| 169 | def reset(self):
|
---|
| 170 | webchecker.Checker.reset(self)
|
---|
| 171 | for p in self.__todo, self.__done, self.__bad, self.__errors:
|
---|
| 172 | p.clear()
|
---|
| 173 | if self.root_seed:
|
---|
| 174 | self.suggestroot(self.root_seed)
|
---|
| 175 |
|
---|
| 176 | def suggestroot(self, root):
|
---|
| 177 | self.__rootentry.delete(0, END)
|
---|
| 178 | self.__rootentry.insert(END, root)
|
---|
| 179 | self.__rootentry.select_range(0, END)
|
---|
| 180 | self.root_seed = root
|
---|
| 181 |
|
---|
| 182 | def enterroot(self, event=None):
|
---|
| 183 | root = self.__rootentry.get()
|
---|
| 184 | root = root.strip()
|
---|
| 185 | if root:
|
---|
| 186 | self.__checking.config(text="Adding root "+root)
|
---|
| 187 | self.__checking.update_idletasks()
|
---|
| 188 | self.addroot(root)
|
---|
| 189 | self.__checking.config(text="Idle")
|
---|
| 190 | try:
|
---|
| 191 | i = self.__todo.items.index(root)
|
---|
| 192 | except (ValueError, IndexError):
|
---|
| 193 | pass
|
---|
| 194 | else:
|
---|
| 195 | self.__todo.list.select_clear(0, END)
|
---|
| 196 | self.__todo.list.select_set(i)
|
---|
| 197 | self.__todo.list.yview(i)
|
---|
| 198 | self.__rootentry.delete(0, END)
|
---|
| 199 |
|
---|
| 200 | def start(self):
|
---|
| 201 | self.__start.config(state=DISABLED, relief=SUNKEN)
|
---|
| 202 | self.__stop.config(state=NORMAL)
|
---|
| 203 | self.__step.config(state=DISABLED)
|
---|
| 204 | self.enterroot()
|
---|
| 205 | self.__running = 1
|
---|
| 206 | self.go()
|
---|
| 207 |
|
---|
| 208 | def stop(self):
|
---|
| 209 | self.__stop.config(state=DISABLED, relief=SUNKEN)
|
---|
| 210 | self.__running = 0
|
---|
| 211 |
|
---|
| 212 | def step(self):
|
---|
| 213 | self.__start.config(state=DISABLED)
|
---|
| 214 | self.__step.config(state=DISABLED, relief=SUNKEN)
|
---|
| 215 | self.enterroot()
|
---|
| 216 | self.__running = 0
|
---|
| 217 | self.dosomething()
|
---|
| 218 |
|
---|
| 219 | def go(self):
|
---|
| 220 | if self.__running:
|
---|
| 221 | self.__parent.after_idle(self.dosomething)
|
---|
| 222 | else:
|
---|
| 223 | self.__checking.config(text="Idle")
|
---|
| 224 | self.__start.config(state=NORMAL, relief=RAISED)
|
---|
| 225 | self.__stop.config(state=DISABLED, relief=RAISED)
|
---|
| 226 | self.__step.config(state=NORMAL, relief=RAISED)
|
---|
| 227 |
|
---|
| 228 | __busy = 0
|
---|
| 229 |
|
---|
| 230 | def dosomething(self):
|
---|
| 231 | if self.__busy: return
|
---|
| 232 | self.__busy = 1
|
---|
| 233 | if self.todo:
|
---|
| 234 | l = self.__todo.selectedindices()
|
---|
| 235 | if l:
|
---|
| 236 | i = l[0]
|
---|
| 237 | else:
|
---|
| 238 | i = 0
|
---|
| 239 | self.__todo.list.select_set(i)
|
---|
| 240 | self.__todo.list.yview(i)
|
---|
| 241 | url = self.__todo.items[i]
|
---|
| 242 | self.__checking.config(text="Checking "+self.format_url(url))
|
---|
| 243 | self.__parent.update()
|
---|
| 244 | self.dopage(url)
|
---|
| 245 | else:
|
---|
| 246 | self.stop()
|
---|
| 247 | self.__busy = 0
|
---|
| 248 | self.go()
|
---|
| 249 |
|
---|
| 250 | def showinfo(self, url):
|
---|
| 251 | d = self.__details
|
---|
| 252 | d.clear()
|
---|
| 253 | d.put("URL: %s\n" % self.format_url(url))
|
---|
| 254 | if self.bad.has_key(url):
|
---|
| 255 | d.put("Error: %s\n" % str(self.bad[url]))
|
---|
| 256 | if url in self.roots:
|
---|
| 257 | d.put("Note: This is a root URL\n")
|
---|
| 258 | if self.done.has_key(url):
|
---|
| 259 | d.put("Status: checked\n")
|
---|
| 260 | o = self.done[url]
|
---|
| 261 | elif self.todo.has_key(url):
|
---|
| 262 | d.put("Status: to check\n")
|
---|
| 263 | o = self.todo[url]
|
---|
| 264 | else:
|
---|
| 265 | d.put("Status: unknown (!)\n")
|
---|
| 266 | o = []
|
---|
| 267 | if (not url[1]) and self.errors.has_key(url[0]):
|
---|
| 268 | d.put("Bad links from this page:\n")
|
---|
| 269 | for triple in self.errors[url[0]]:
|
---|
| 270 | link, rawlink, msg = triple
|
---|
| 271 | d.put(" HREF %s" % self.format_url(link))
|
---|
| 272 | if self.format_url(link) != rawlink: d.put(" (%s)" %rawlink)
|
---|
| 273 | d.put("\n")
|
---|
| 274 | d.put(" error %s\n" % str(msg))
|
---|
| 275 | self.__mp.showpanel("Details")
|
---|
| 276 | for source, rawlink in o:
|
---|
| 277 | d.put("Origin: %s" % source)
|
---|
| 278 | if rawlink != self.format_url(url):
|
---|
| 279 | d.put(" (%s)" % rawlink)
|
---|
| 280 | d.put("\n")
|
---|
| 281 | d.text.yview("1.0")
|
---|
| 282 |
|
---|
| 283 | def setbad(self, url, msg):
|
---|
| 284 | webchecker.Checker.setbad(self, url, msg)
|
---|
| 285 | self.__bad.insert(url)
|
---|
| 286 | self.newstatus()
|
---|
| 287 |
|
---|
| 288 | def setgood(self, url):
|
---|
| 289 | webchecker.Checker.setgood(self, url)
|
---|
| 290 | self.__bad.remove(url)
|
---|
| 291 | self.newstatus()
|
---|
| 292 |
|
---|
| 293 | def newlink(self, url, origin):
|
---|
| 294 | webchecker.Checker.newlink(self, url, origin)
|
---|
| 295 | if self.done.has_key(url):
|
---|
| 296 | self.__done.insert(url)
|
---|
| 297 | elif self.todo.has_key(url):
|
---|
| 298 | self.__todo.insert(url)
|
---|
| 299 | self.newstatus()
|
---|
| 300 |
|
---|
| 301 | def markdone(self, url):
|
---|
| 302 | webchecker.Checker.markdone(self, url)
|
---|
| 303 | self.__done.insert(url)
|
---|
| 304 | self.__todo.remove(url)
|
---|
| 305 | self.newstatus()
|
---|
| 306 |
|
---|
| 307 | def seterror(self, url, triple):
|
---|
| 308 | webchecker.Checker.seterror(self, url, triple)
|
---|
| 309 | self.__errors.insert((url, ''))
|
---|
| 310 | self.newstatus()
|
---|
| 311 |
|
---|
| 312 | def newstatus(self):
|
---|
| 313 | self.__status.config(text="Status: "+self.status())
|
---|
| 314 | self.__parent.update()
|
---|
| 315 |
|
---|
| 316 | def update_checkext(self):
|
---|
| 317 | self.checkext = self.__cv.get()
|
---|
| 318 |
|
---|
| 319 |
|
---|
| 320 | class ListPanel:
|
---|
| 321 |
|
---|
| 322 | def __init__(self, mp, name, checker, showinfo=None):
|
---|
| 323 | self.mp = mp
|
---|
| 324 | self.name = name
|
---|
| 325 | self.showinfo = showinfo
|
---|
| 326 | self.checker = checker
|
---|
| 327 | self.panel = mp.addpanel(name)
|
---|
| 328 | self.list, self.frame = tktools.make_list_box(
|
---|
| 329 | self.panel, width=60, height=5)
|
---|
| 330 | self.list.config(exportselection=0)
|
---|
| 331 | if showinfo:
|
---|
| 332 | self.list.bind('<Double-Button-1>', self.doubleclick)
|
---|
| 333 | self.items = []
|
---|
| 334 |
|
---|
| 335 | def clear(self):
|
---|
| 336 | self.items = []
|
---|
| 337 | self.list.delete(0, END)
|
---|
| 338 | self.mp.hidepanel(self.name)
|
---|
| 339 |
|
---|
| 340 | def doubleclick(self, event):
|
---|
| 341 | l = self.selectedindices()
|
---|
| 342 | if l:
|
---|
| 343 | self.showinfo(self.items[l[0]])
|
---|
| 344 |
|
---|
| 345 | def selectedindices(self):
|
---|
| 346 | l = self.list.curselection()
|
---|
| 347 | if not l: return []
|
---|
| 348 | return map(int, l)
|
---|
| 349 |
|
---|
| 350 | def insert(self, url):
|
---|
| 351 | if url not in self.items:
|
---|
| 352 | if not self.items:
|
---|
| 353 | self.mp.showpanel(self.name)
|
---|
| 354 | # (I tried sorting alphabetically, but the display is too jumpy)
|
---|
| 355 | i = len(self.items)
|
---|
| 356 | self.list.insert(i, self.checker.format_url(url))
|
---|
| 357 | self.list.yview(i)
|
---|
| 358 | self.items.insert(i, url)
|
---|
| 359 |
|
---|
| 360 | def remove(self, url):
|
---|
| 361 | try:
|
---|
| 362 | i = self.items.index(url)
|
---|
| 363 | except (ValueError, IndexError):
|
---|
| 364 | pass
|
---|
| 365 | else:
|
---|
| 366 | was_selected = i in self.selectedindices()
|
---|
| 367 | self.list.delete(i)
|
---|
| 368 | del self.items[i]
|
---|
| 369 | if not self.items:
|
---|
| 370 | self.mp.hidepanel(self.name)
|
---|
| 371 | elif was_selected:
|
---|
| 372 | if i >= len(self.items):
|
---|
| 373 | i = len(self.items) - 1
|
---|
| 374 | self.list.select_set(i)
|
---|
| 375 |
|
---|
| 376 |
|
---|
| 377 | class LogPanel:
|
---|
| 378 |
|
---|
| 379 | def __init__(self, mp, name):
|
---|
| 380 | self.mp = mp
|
---|
| 381 | self.name = name
|
---|
| 382 | self.panel = mp.addpanel(name)
|
---|
| 383 | self.text, self.frame = tktools.make_text_box(self.panel, height=10)
|
---|
| 384 | self.text.config(wrap=NONE)
|
---|
| 385 |
|
---|
| 386 | def clear(self):
|
---|
| 387 | self.text.delete("1.0", END)
|
---|
| 388 | self.text.yview("1.0")
|
---|
| 389 |
|
---|
| 390 | def put(self, s):
|
---|
| 391 | self.text.insert(END, s)
|
---|
| 392 | if '\n' in s:
|
---|
| 393 | self.text.yview(END)
|
---|
| 394 |
|
---|
| 395 | def write(self, s):
|
---|
| 396 | self.text.insert(END, s)
|
---|
| 397 | if '\n' in s:
|
---|
| 398 | self.text.yview(END)
|
---|
| 399 | self.panel.update()
|
---|
| 400 |
|
---|
| 401 |
|
---|
| 402 | class MultiPanel:
|
---|
| 403 |
|
---|
| 404 | def __init__(self, parent):
|
---|
| 405 | self.parent = parent
|
---|
| 406 | self.frame = Frame(self.parent)
|
---|
| 407 | self.frame.pack(expand=1, fill=BOTH)
|
---|
| 408 | self.topframe = Frame(self.frame, borderwidth=2, relief=RAISED)
|
---|
| 409 | self.topframe.pack(fill=X)
|
---|
| 410 | self.botframe = Frame(self.frame)
|
---|
| 411 | self.botframe.pack(expand=1, fill=BOTH)
|
---|
| 412 | self.panelnames = []
|
---|
| 413 | self.panels = {}
|
---|
| 414 |
|
---|
| 415 | def addpanel(self, name, on=0):
|
---|
| 416 | v = StringVar(self.parent)
|
---|
| 417 | if on:
|
---|
| 418 | v.set(name)
|
---|
| 419 | else:
|
---|
| 420 | v.set("")
|
---|
| 421 | check = Checkbutton(self.topframe, text=name,
|
---|
| 422 | offvalue="", onvalue=name, variable=v,
|
---|
| 423 | command=self.checkpanel)
|
---|
| 424 | check.pack(side=LEFT)
|
---|
| 425 | panel = Frame(self.botframe)
|
---|
| 426 | label = Label(panel, text=name, borderwidth=2, relief=RAISED, anchor=W)
|
---|
| 427 | label.pack(side=TOP, fill=X)
|
---|
| 428 | t = v, check, panel
|
---|
| 429 | self.panelnames.append(name)
|
---|
| 430 | self.panels[name] = t
|
---|
| 431 | if on:
|
---|
| 432 | panel.pack(expand=1, fill=BOTH)
|
---|
| 433 | return panel
|
---|
| 434 |
|
---|
| 435 | def showpanel(self, name):
|
---|
| 436 | v, check, panel = self.panels[name]
|
---|
| 437 | v.set(name)
|
---|
| 438 | panel.pack(expand=1, fill=BOTH)
|
---|
| 439 |
|
---|
| 440 | def hidepanel(self, name):
|
---|
| 441 | v, check, panel = self.panels[name]
|
---|
| 442 | v.set("")
|
---|
| 443 | panel.pack_forget()
|
---|
| 444 |
|
---|
| 445 | def checkpanel(self):
|
---|
| 446 | for name in self.panelnames:
|
---|
| 447 | v, check, panel = self.panels[name]
|
---|
| 448 | panel.pack_forget()
|
---|
| 449 | for name in self.panelnames:
|
---|
| 450 | v, check, panel = self.panels[name]
|
---|
| 451 | if v.get():
|
---|
| 452 | panel.pack(expand=1, fill=BOTH)
|
---|
| 453 |
|
---|
| 454 |
|
---|
| 455 | if __name__ == '__main__':
|
---|
| 456 | main()
|
---|