Changeset 391 for python/trunk/Lib/test/test_robotparser.py
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Lib/test/test_robotparser.py
r2 r391 1 1 import unittest, StringIO, robotparser 2 2 from test import test_support 3 from urllib2 import urlopen, HTTPError 3 4 4 5 class RobotTestCase(unittest.TestCase): … … 21 22 agent = self.agent 22 23 if self.good: 23 self. failUnless(self.parser.can_fetch(agent, url))24 self.assertTrue(self.parser.can_fetch(agent, url)) 24 25 else: 25 self. failIf(self.parser.can_fetch(agent, url))26 self.assertFalse(self.parser.can_fetch(agent, url)) 26 27 27 28 def __str__(self): … … 203 204 204 205 205 206 class TestCase(unittest.TestCase): 207 def runTest(self): 206 # 14. For issue #6325 (query string support) 207 doc = """ 208 User-agent: * 209 Disallow: /some/path?name=value 210 """ 211 212 good = ['/some/path'] 213 bad = ['/some/path?name=value'] 214 215 RobotTest(14, doc, good, bad) 216 217 # 15. For issue #4108 (obey first * entry) 218 doc = """ 219 User-agent: * 220 Disallow: /some/path 221 222 User-agent: * 223 Disallow: /another/path 224 """ 225 226 good = ['/another/path'] 227 bad = ['/some/path'] 228 229 RobotTest(15, doc, good, bad) 230 231 # 16. Empty query (issue #17403). Normalizing the url first. 232 doc = """ 233 User-agent: * 234 Allow: /some/path? 235 Disallow: /another/path? 236 """ 237 238 good = ['/some/path?'] 239 bad = ['/another/path?'] 240 241 RobotTest(16, doc, good, bad) 242 243 244 class NetworkTestCase(unittest.TestCase): 245 246 def testPasswordProtectedSite(self): 208 247 test_support.requires('network') 209 # whole site is password-protected. 210 url = 'http://mueblesmoraleda.com' 211 parser = robotparser.RobotFileParser() 212 parser.set_url(url) 213 parser.read() 214 self.assertEqual(parser.can_fetch("*", url+"/robots.txt"), False) 248 with test_support.transient_internet('mueblesmoraleda.com'): 249 url = 'http://mueblesmoraleda.com' 250 robots_url = url + "/robots.txt" 251 # First check the URL is usable for our purposes, since the 252 # test site is a bit flaky. 253 try: 254 urlopen(robots_url) 255 except HTTPError as e: 256 if e.code not in {401, 403}: 257 self.skipTest( 258 "%r should return a 401 or 403 HTTP error, not %r" 259 % (robots_url, e.code)) 260 else: 261 self.skipTest( 262 "%r should return a 401 or 403 HTTP error, not succeed" 263 % (robots_url)) 264 parser = robotparser.RobotFileParser() 265 parser.set_url(url) 266 try: 267 parser.read() 268 except IOError: 269 self.skipTest('%s is unavailable' % url) 270 self.assertEqual(parser.can_fetch("*", robots_url), False) 271 272 def testPythonOrg(self): 273 test_support.requires('network') 274 with test_support.transient_internet('www.python.org'): 275 parser = robotparser.RobotFileParser( 276 "http://www.python.org/robots.txt") 277 parser.read() 278 self.assertTrue( 279 parser.can_fetch("*", "http://www.python.org/robots.txt")) 280 215 281 216 282 def test_main(): 217 283 test_support.run_unittest(tests) 218 TestCase().run()284 test_support.run_unittest(NetworkTestCase) 219 285 220 286 if __name__=='__main__':
Note:
See TracChangeset
for help on using the changeset viewer.