source: python/vendor/Python-2.6.5/Lib/httplib.py

Last change on this file was 2, checked in by Yuri Dario, 15 years ago

Initial import for vendor code.

  • Property svn:eol-style set to native
File size: 44.3 KB
Line 
1"""HTTP/1.1 client library
2
3<intro stuff goes here>
4<other stuff, too>
5
6HTTPConnection goes through a number of "states", which define when a client
7may legally make another request or fetch the response for a particular
8request. This diagram details these state transitions:
9
10 (null)
11 |
12 | HTTPConnection()
13 v
14 Idle
15 |
16 | putrequest()
17 v
18 Request-started
19 |
20 | ( putheader() )* endheaders()
21 v
22 Request-sent
23 |
24 | response = getresponse()
25 v
26 Unread-response [Response-headers-read]
27 |\____________________
28 | |
29 | response.read() | putrequest()
30 v v
31 Idle Req-started-unread-response
32 ______/|
33 / |
34 response.read() | | ( putheader() )* endheaders()
35 v v
36 Request-started Req-sent-unread-response
37 |
38 | response.read()
39 v
40 Request-sent
41
42This diagram presents the following rules:
43 -- a second request may not be started until {response-headers-read}
44 -- a response [object] cannot be retrieved until {request-sent}
45 -- there is no differentiation between an unread response body and a
46 partially read response body
47
48Note: this enforcement is applied by the HTTPConnection class. The
49 HTTPResponse class does not enforce this state machine, which
50 implies sophisticated clients may accelerate the request/response
51 pipeline. Caution should be taken, though: accelerating the states
52 beyond the above pattern may imply knowledge of the server's
53 connection-close behavior for certain requests. For example, it
54 is impossible to tell whether the server will close the connection
55 UNTIL the response headers have been read; this means that further
56 requests cannot be placed into the pipeline until it is known that
57 the server will NOT be closing the connection.
58
59Logical State __state __response
60------------- ------- ----------
61Idle _CS_IDLE None
62Request-started _CS_REQ_STARTED None
63Request-sent _CS_REQ_SENT None
64Unread-response _CS_IDLE <response_class>
65Req-started-unread-response _CS_REQ_STARTED <response_class>
66Req-sent-unread-response _CS_REQ_SENT <response_class>
67"""
68
69from array import array
70import socket
71from sys import py3kwarning
72from urlparse import urlsplit
73import warnings
74with warnings.catch_warnings():
75 if py3kwarning:
76 warnings.filterwarnings("ignore", ".*mimetools has been removed",
77 DeprecationWarning)
78 import mimetools
79
80try:
81 from cStringIO import StringIO
82except ImportError:
83 from StringIO import StringIO
84
85__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
86 "HTTPException", "NotConnected", "UnknownProtocol",
87 "UnknownTransferEncoding", "UnimplementedFileMode",
88 "IncompleteRead", "InvalidURL", "ImproperConnectionState",
89 "CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
90 "BadStatusLine", "error", "responses"]
91
92HTTP_PORT = 80
93HTTPS_PORT = 443
94
95_UNKNOWN = 'UNKNOWN'
96
97# connection states
98_CS_IDLE = 'Idle'
99_CS_REQ_STARTED = 'Request-started'
100_CS_REQ_SENT = 'Request-sent'
101
102# status codes
103# informational
104CONTINUE = 100
105SWITCHING_PROTOCOLS = 101
106PROCESSING = 102
107
108# successful
109OK = 200
110CREATED = 201
111ACCEPTED = 202
112NON_AUTHORITATIVE_INFORMATION = 203
113NO_CONTENT = 204
114RESET_CONTENT = 205
115PARTIAL_CONTENT = 206
116MULTI_STATUS = 207
117IM_USED = 226
118
119# redirection
120MULTIPLE_CHOICES = 300
121MOVED_PERMANENTLY = 301
122FOUND = 302
123SEE_OTHER = 303
124NOT_MODIFIED = 304
125USE_PROXY = 305
126TEMPORARY_REDIRECT = 307
127
128# client error
129BAD_REQUEST = 400
130UNAUTHORIZED = 401
131PAYMENT_REQUIRED = 402
132FORBIDDEN = 403
133NOT_FOUND = 404
134METHOD_NOT_ALLOWED = 405
135NOT_ACCEPTABLE = 406
136PROXY_AUTHENTICATION_REQUIRED = 407
137REQUEST_TIMEOUT = 408
138CONFLICT = 409
139GONE = 410
140LENGTH_REQUIRED = 411
141PRECONDITION_FAILED = 412
142REQUEST_ENTITY_TOO_LARGE = 413
143REQUEST_URI_TOO_LONG = 414
144UNSUPPORTED_MEDIA_TYPE = 415
145REQUESTED_RANGE_NOT_SATISFIABLE = 416
146EXPECTATION_FAILED = 417
147UNPROCESSABLE_ENTITY = 422
148LOCKED = 423
149FAILED_DEPENDENCY = 424
150UPGRADE_REQUIRED = 426
151
152# server error
153INTERNAL_SERVER_ERROR = 500
154NOT_IMPLEMENTED = 501
155BAD_GATEWAY = 502
156SERVICE_UNAVAILABLE = 503
157GATEWAY_TIMEOUT = 504
158HTTP_VERSION_NOT_SUPPORTED = 505
159INSUFFICIENT_STORAGE = 507
160NOT_EXTENDED = 510
161
162# Mapping status codes to official W3C names
163responses = {
164 100: 'Continue',
165 101: 'Switching Protocols',
166
167 200: 'OK',
168 201: 'Created',
169 202: 'Accepted',
170 203: 'Non-Authoritative Information',
171 204: 'No Content',
172 205: 'Reset Content',
173 206: 'Partial Content',
174
175 300: 'Multiple Choices',
176 301: 'Moved Permanently',
177 302: 'Found',
178 303: 'See Other',
179 304: 'Not Modified',
180 305: 'Use Proxy',
181 306: '(Unused)',
182 307: 'Temporary Redirect',
183
184 400: 'Bad Request',
185 401: 'Unauthorized',
186 402: 'Payment Required',
187 403: 'Forbidden',
188 404: 'Not Found',
189 405: 'Method Not Allowed',
190 406: 'Not Acceptable',
191 407: 'Proxy Authentication Required',
192 408: 'Request Timeout',
193 409: 'Conflict',
194 410: 'Gone',
195 411: 'Length Required',
196 412: 'Precondition Failed',
197 413: 'Request Entity Too Large',
198 414: 'Request-URI Too Long',
199 415: 'Unsupported Media Type',
200 416: 'Requested Range Not Satisfiable',
201 417: 'Expectation Failed',
202
203 500: 'Internal Server Error',
204 501: 'Not Implemented',
205 502: 'Bad Gateway',
206 503: 'Service Unavailable',
207 504: 'Gateway Timeout',
208 505: 'HTTP Version Not Supported',
209}
210
211# maximal amount of data to read at one time in _safe_read
212MAXAMOUNT = 1048576
213
214class HTTPMessage(mimetools.Message):
215
216 def addheader(self, key, value):
217 """Add header for field key handling repeats."""
218 prev = self.dict.get(key)
219 if prev is None:
220 self.dict[key] = value
221 else:
222 combined = ", ".join((prev, value))
223 self.dict[key] = combined
224
225 def addcontinue(self, key, more):
226 """Add more field data from a continuation line."""
227 prev = self.dict[key]
228 self.dict[key] = prev + "\n " + more
229
230 def readheaders(self):
231 """Read header lines.
232
233 Read header lines up to the entirely blank line that terminates them.
234 The (normally blank) line that ends the headers is skipped, but not
235 included in the returned list. If a non-header line ends the headers,
236 (which is an error), an attempt is made to backspace over it; it is
237 never included in the returned list.
238
239 The variable self.status is set to the empty string if all went well,
240 otherwise it is an error message. The variable self.headers is a
241 completely uninterpreted list of lines contained in the header (so
242 printing them will reproduce the header exactly as it appears in the
243 file).
244
245 If multiple header fields with the same name occur, they are combined
246 according to the rules in RFC 2616 sec 4.2:
247
248 Appending each subsequent field-value to the first, each separated
249 by a comma. The order in which header fields with the same field-name
250 are received is significant to the interpretation of the combined
251 field value.
252 """
253 # XXX The implementation overrides the readheaders() method of
254 # rfc822.Message. The base class design isn't amenable to
255 # customized behavior here so the method here is a copy of the
256 # base class code with a few small changes.
257
258 self.dict = {}
259 self.unixfrom = ''
260 self.headers = hlist = []
261 self.status = ''
262 headerseen = ""
263 firstline = 1
264 startofline = unread = tell = None
265 if hasattr(self.fp, 'unread'):
266 unread = self.fp.unread
267 elif self.seekable:
268 tell = self.fp.tell
269 while True:
270 if tell:
271 try:
272 startofline = tell()
273 except IOError:
274 startofline = tell = None
275 self.seekable = 0
276 line = self.fp.readline()
277 if not line:
278 self.status = 'EOF in headers'
279 break
280 # Skip unix From name time lines
281 if firstline and line.startswith('From '):
282 self.unixfrom = self.unixfrom + line
283 continue
284 firstline = 0
285 if headerseen and line[0] in ' \t':
286 # XXX Not sure if continuation lines are handled properly
287 # for http and/or for repeating headers
288 # It's a continuation line.
289 hlist.append(line)
290 self.addcontinue(headerseen, line.strip())
291 continue
292 elif self.iscomment(line):
293 # It's a comment. Ignore it.
294 continue
295 elif self.islast(line):
296 # Note! No pushback here! The delimiter line gets eaten.
297 break
298 headerseen = self.isheader(line)
299 if headerseen:
300 # It's a legal header line, save it.
301 hlist.append(line)
302 self.addheader(headerseen, line[len(headerseen)+1:].strip())
303 continue
304 else:
305 # It's not a header line; throw it back and stop here.
306 if not self.dict:
307 self.status = 'No headers'
308 else:
309 self.status = 'Non-header line where header expected'
310 # Try to undo the read.
311 if unread:
312 unread(line)
313 elif tell:
314 self.fp.seek(startofline)
315 else:
316 self.status = self.status + '; bad seek'
317 break
318
319class HTTPResponse:
320
321 # strict: If true, raise BadStatusLine if the status line can't be
322 # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
323 # false because it prevents clients from talking to HTTP/0.9
324 # servers. Note that a response with a sufficiently corrupted
325 # status line will look like an HTTP/0.9 response.
326
327 # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328
329 def __init__(self, sock, debuglevel=0, strict=0, method=None):
330 self.fp = sock.makefile('rb', 0)
331 self.debuglevel = debuglevel
332 self.strict = strict
333 self._method = method
334
335 self.msg = None
336
337 # from the Status-Line of the response
338 self.version = _UNKNOWN # HTTP-Version
339 self.status = _UNKNOWN # Status-Code
340 self.reason = _UNKNOWN # Reason-Phrase
341
342 self.chunked = _UNKNOWN # is "chunked" being used?
343 self.chunk_left = _UNKNOWN # bytes left to read in current chunk
344 self.length = _UNKNOWN # number of bytes left in response
345 self.will_close = _UNKNOWN # conn will close at end of response
346
347 def _read_status(self):
348 # Initialize with Simple-Response defaults
349 line = self.fp.readline()
350 if self.debuglevel > 0:
351 print "reply:", repr(line)
352 if not line:
353 # Presumably, the server closed the connection before
354 # sending a valid response.
355 raise BadStatusLine(line)
356 try:
357 [version, status, reason] = line.split(None, 2)
358 except ValueError:
359 try:
360 [version, status] = line.split(None, 1)
361 reason = ""
362 except ValueError:
363 # empty version will cause next test to fail and status
364 # will be treated as 0.9 response.
365 version = ""
366 if not version.startswith('HTTP/'):
367 if self.strict:
368 self.close()
369 raise BadStatusLine(line)
370 else:
371 # assume it's a Simple-Response from an 0.9 server
372 self.fp = LineAndFileWrapper(line, self.fp)
373 return "HTTP/0.9", 200, ""
374
375 # The status code is a three-digit number
376 try:
377 status = int(status)
378 if status < 100 or status > 999:
379 raise BadStatusLine(line)
380 except ValueError:
381 raise BadStatusLine(line)
382 return version, status, reason
383
384 def begin(self):
385 if self.msg is not None:
386 # we've already started reading the response
387 return
388
389 # read until we get a non-100 response
390 while True:
391 version, status, reason = self._read_status()
392 if status != CONTINUE:
393 break
394 # skip the header from the 100 response
395 while True:
396 skip = self.fp.readline().strip()
397 if not skip:
398 break
399 if self.debuglevel > 0:
400 print "header:", skip
401
402 self.status = status
403 self.reason = reason.strip()
404 if version == 'HTTP/1.0':
405 self.version = 10
406 elif version.startswith('HTTP/1.'):
407 self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
408 elif version == 'HTTP/0.9':
409 self.version = 9
410 else:
411 raise UnknownProtocol(version)
412
413 if self.version == 9:
414 self.length = None
415 self.chunked = 0
416 self.will_close = 1
417 self.msg = HTTPMessage(StringIO())
418 return
419
420 self.msg = HTTPMessage(self.fp, 0)
421 if self.debuglevel > 0:
422 for hdr in self.msg.headers:
423 print "header:", hdr,
424
425 # don't let the msg keep an fp
426 self.msg.fp = None
427
428 # are we using the chunked-style of transfer encoding?
429 tr_enc = self.msg.getheader('transfer-encoding')
430 if tr_enc and tr_enc.lower() == "chunked":
431 self.chunked = 1
432 self.chunk_left = None
433 else:
434 self.chunked = 0
435
436 # will the connection close at the end of the response?
437 self.will_close = self._check_close()
438
439 # do we have a Content-Length?
440 # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
441 length = self.msg.getheader('content-length')
442 if length and not self.chunked:
443 try:
444 self.length = int(length)
445 except ValueError:
446 self.length = None
447 else:
448 if self.length < 0: # ignore nonsensical negative lengths
449 self.length = None
450 else:
451 self.length = None
452
453 # does the body have a fixed length? (of zero)
454 if (status == NO_CONTENT or status == NOT_MODIFIED or
455 100 <= status < 200 or # 1xx codes
456 self._method == 'HEAD'):
457 self.length = 0
458
459 # if the connection remains open, and we aren't using chunked, and
460 # a content-length was not provided, then assume that the connection
461 # WILL close.
462 if not self.will_close and \
463 not self.chunked and \
464 self.length is None:
465 self.will_close = 1
466
467 def _check_close(self):
468 conn = self.msg.getheader('connection')
469 if self.version == 11:
470 # An HTTP/1.1 proxy is assumed to stay open unless
471 # explicitly closed.
472 conn = self.msg.getheader('connection')
473 if conn and "close" in conn.lower():
474 return True
475 return False
476
477 # Some HTTP/1.0 implementations have support for persistent
478 # connections, using rules different than HTTP/1.1.
479
480 # For older HTTP, Keep-Alive indicates persistent connection.
481 if self.msg.getheader('keep-alive'):
482 return False
483
484 # At least Akamai returns a "Connection: Keep-Alive" header,
485 # which was supposed to be sent by the client.
486 if conn and "keep-alive" in conn.lower():
487 return False
488
489 # Proxy-Connection is a netscape hack.
490 pconn = self.msg.getheader('proxy-connection')
491 if pconn and "keep-alive" in pconn.lower():
492 return False
493
494 # otherwise, assume it will close
495 return True
496
497 def close(self):
498 if self.fp:
499 self.fp.close()
500 self.fp = None
501
502 def isclosed(self):
503 # NOTE: it is possible that we will not ever call self.close(). This
504 # case occurs when will_close is TRUE, length is None, and we
505 # read up to the last byte, but NOT past it.
506 #
507 # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
508 # called, meaning self.isclosed() is meaningful.
509 return self.fp is None
510
511 # XXX It would be nice to have readline and __iter__ for this, too.
512
513 def read(self, amt=None):
514 if self.fp is None:
515 return ''
516
517 if self.chunked:
518 return self._read_chunked(amt)
519
520 if amt is None:
521 # unbounded read
522 if self.length is None:
523 s = self.fp.read()
524 else:
525 s = self._safe_read(self.length)
526 self.length = 0
527 self.close() # we read everything
528 return s
529
530 if self.length is not None:
531 if amt > self.length:
532 # clip the read to the "end of response"
533 amt = self.length
534
535 # we do not use _safe_read() here because this may be a .will_close
536 # connection, and the user is reading more bytes than will be provided
537 # (for example, reading in 1k chunks)
538 s = self.fp.read(amt)
539 if self.length is not None:
540 self.length -= len(s)
541 if not self.length:
542 self.close()
543 return s
544
545 def _read_chunked(self, amt):
546 assert self.chunked != _UNKNOWN
547 chunk_left = self.chunk_left
548 value = []
549 while True:
550 if chunk_left is None:
551 line = self.fp.readline()
552 i = line.find(';')
553 if i >= 0:
554 line = line[:i] # strip chunk-extensions
555 try:
556 chunk_left = int(line, 16)
557 except ValueError:
558 # close the connection as protocol synchronisation is
559 # probably lost
560 self.close()
561 raise IncompleteRead(''.join(value))
562 if chunk_left == 0:
563 break
564 if amt is None:
565 value.append(self._safe_read(chunk_left))
566 elif amt < chunk_left:
567 value.append(self._safe_read(amt))
568 self.chunk_left = chunk_left - amt
569 return ''.join(value)
570 elif amt == chunk_left:
571 value.append(self._safe_read(amt))
572 self._safe_read(2) # toss the CRLF at the end of the chunk
573 self.chunk_left = None
574 return ''.join(value)
575 else:
576 value.append(self._safe_read(chunk_left))
577 amt -= chunk_left
578
579 # we read the whole chunk, get another
580 self._safe_read(2) # toss the CRLF at the end of the chunk
581 chunk_left = None
582
583 # read and discard trailer up to the CRLF terminator
584 ### note: we shouldn't have any trailers!
585 while True:
586 line = self.fp.readline()
587 if not line:
588 # a vanishingly small number of sites EOF without
589 # sending the trailer
590 break
591 if line == '\r\n':
592 break
593
594 # we read everything; close the "file"
595 self.close()
596
597 return ''.join(value)
598
599 def _safe_read(self, amt):
600 """Read the number of bytes requested, compensating for partial reads.
601
602 Normally, we have a blocking socket, but a read() can be interrupted
603 by a signal (resulting in a partial read).
604
605 Note that we cannot distinguish between EOF and an interrupt when zero
606 bytes have been read. IncompleteRead() will be raised in this
607 situation.
608
609 This function should be used when <amt> bytes "should" be present for
610 reading. If the bytes are truly not available (due to EOF), then the
611 IncompleteRead exception can be used to detect the problem.
612 """
613 s = []
614 while amt > 0:
615 chunk = self.fp.read(min(amt, MAXAMOUNT))
616 if not chunk:
617 raise IncompleteRead(''.join(s), amt)
618 s.append(chunk)
619 amt -= len(chunk)
620 return ''.join(s)
621
622 def getheader(self, name, default=None):
623 if self.msg is None:
624 raise ResponseNotReady()
625 return self.msg.getheader(name, default)
626
627 def getheaders(self):
628 """Return list of (header, value) tuples."""
629 if self.msg is None:
630 raise ResponseNotReady()
631 return self.msg.items()
632
633
634class HTTPConnection:
635
636 _http_vsn = 11
637 _http_vsn_str = 'HTTP/1.1'
638
639 response_class = HTTPResponse
640 default_port = HTTP_PORT
641 auto_open = 1
642 debuglevel = 0
643 strict = 0
644
645 def __init__(self, host, port=None, strict=None,
646 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
647 self.timeout = timeout
648 self.sock = None
649 self._buffer = []
650 self.__response = None
651 self.__state = _CS_IDLE
652 self._method = None
653 self._tunnel_host = None
654 self._tunnel_port = None
655 self._tunnel_headers = {}
656
657 self._set_hostport(host, port)
658 if strict is not None:
659 self.strict = strict
660
661 def _set_tunnel(self, host, port=None, headers=None):
662 """ Sets up the host and the port for the HTTP CONNECT Tunnelling.
663
664 The headers argument should be a mapping of extra HTTP headers
665 to send with the CONNECT request.
666 """
667 self._tunnel_host = host
668 self._tunnel_port = port
669 if headers:
670 self._tunnel_headers = headers
671 else:
672 self._tunnel_headers.clear()
673
674 def _set_hostport(self, host, port):
675 if port is None:
676 i = host.rfind(':')
677 j = host.rfind(']') # ipv6 addresses have [...]
678 if i > j:
679 try:
680 port = int(host[i+1:])
681 except ValueError:
682 raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
683 host = host[:i]
684 else:
685 port = self.default_port
686 if host and host[0] == '[' and host[-1] == ']':
687 host = host[1:-1]
688 self.host = host
689 self.port = port
690
691 def set_debuglevel(self, level):
692 self.debuglevel = level
693
694 def _tunnel(self):
695 self._set_hostport(self._tunnel_host, self._tunnel_port)
696 self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
697 for header, value in self._tunnel_headers.iteritems():
698 self.send("%s: %s\r\n" % (header, value))
699 self.send("\r\n")
700 response = self.response_class(self.sock, strict = self.strict,
701 method = self._method)
702 (version, code, message) = response._read_status()
703
704 if code != 200:
705 self.close()
706 raise socket.error, "Tunnel connection failed: %d %s" % (code,
707 message.strip())
708 while True:
709 line = response.fp.readline()
710 if line == '\r\n': break
711
712
713 def connect(self):
714 """Connect to the host and port specified in __init__."""
715 self.sock = socket.create_connection((self.host,self.port),
716 self.timeout)
717
718 if self._tunnel_host:
719 self._tunnel()
720
721 def close(self):
722 """Close the connection to the HTTP server."""
723 if self.sock:
724 self.sock.close() # close it manually... there may be other refs
725 self.sock = None
726 if self.__response:
727 self.__response.close()
728 self.__response = None
729 self.__state = _CS_IDLE
730
731 def send(self, str):
732 """Send `str' to the server."""
733 if self.sock is None:
734 if self.auto_open:
735 self.connect()
736 else:
737 raise NotConnected()
738
739 # send the data to the server. if we get a broken pipe, then close
740 # the socket. we want to reconnect when somebody tries to send again.
741 #
742 # NOTE: we DO propagate the error, though, because we cannot simply
743 # ignore the error... the caller will know if they can retry.
744 if self.debuglevel > 0:
745 print "send:", repr(str)
746 try:
747 blocksize=8192
748 if hasattr(str,'read') and not isinstance(str, array):
749 if self.debuglevel > 0: print "sendIng a read()able"
750 data=str.read(blocksize)
751 while data:
752 self.sock.sendall(data)
753 data=str.read(blocksize)
754 else:
755 self.sock.sendall(str)
756 except socket.error, v:
757 if v[0] == 32: # Broken pipe
758 self.close()
759 raise
760
761 def _output(self, s):
762 """Add a line of output to the current request buffer.
763
764 Assumes that the line does *not* end with \\r\\n.
765 """
766 self._buffer.append(s)
767
768 def _send_output(self):
769 """Send the currently buffered request and clear the buffer.
770
771 Appends an extra \\r\\n to the buffer.
772 """
773 self._buffer.extend(("", ""))
774 msg = "\r\n".join(self._buffer)
775 del self._buffer[:]
776 self.send(msg)
777
778 def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
779 """Send a request to the server.
780
781 `method' specifies an HTTP request method, e.g. 'GET'.
782 `url' specifies the object being requested, e.g. '/index.html'.
783 `skip_host' if True does not add automatically a 'Host:' header
784 `skip_accept_encoding' if True does not add automatically an
785 'Accept-Encoding:' header
786 """
787
788 # if a prior response has been completed, then forget about it.
789 if self.__response and self.__response.isclosed():
790 self.__response = None
791
792
793 # in certain cases, we cannot issue another request on this connection.
794 # this occurs when:
795 # 1) we are in the process of sending a request. (_CS_REQ_STARTED)
796 # 2) a response to a previous request has signalled that it is going
797 # to close the connection upon completion.
798 # 3) the headers for the previous response have not been read, thus
799 # we cannot determine whether point (2) is true. (_CS_REQ_SENT)
800 #
801 # if there is no prior response, then we can request at will.
802 #
803 # if point (2) is true, then we will have passed the socket to the
804 # response (effectively meaning, "there is no prior response"), and
805 # will open a new one when a new request is made.
806 #
807 # Note: if a prior response exists, then we *can* start a new request.
808 # We are not allowed to begin fetching the response to this new
809 # request, however, until that prior response is complete.
810 #
811 if self.__state == _CS_IDLE:
812 self.__state = _CS_REQ_STARTED
813 else:
814 raise CannotSendRequest()
815
816 # Save the method we use, we need it later in the response phase
817 self._method = method
818 if not url:
819 url = '/'
820 str = '%s %s %s' % (method, url, self._http_vsn_str)
821
822 self._output(str)
823
824 if self._http_vsn == 11:
825 # Issue some standard headers for better HTTP/1.1 compliance
826
827 if not skip_host:
828 # this header is issued *only* for HTTP/1.1
829 # connections. more specifically, this means it is
830 # only issued when the client uses the new
831 # HTTPConnection() class. backwards-compat clients
832 # will be using HTTP/1.0 and those clients may be
833 # issuing this header themselves. we should NOT issue
834 # it twice; some web servers (such as Apache) barf
835 # when they see two Host: headers
836
837 # If we need a non-standard port,include it in the
838 # header. If the request is going through a proxy,
839 # but the host of the actual URL, not the host of the
840 # proxy.
841
842 netloc = ''
843 if url.startswith('http'):
844 nil, netloc, nil, nil, nil = urlsplit(url)
845
846 if netloc:
847 try:
848 netloc_enc = netloc.encode("ascii")
849 except UnicodeEncodeError:
850 netloc_enc = netloc.encode("idna")
851 self.putheader('Host', netloc_enc)
852 else:
853 try:
854 host_enc = self.host.encode("ascii")
855 except UnicodeEncodeError:
856 host_enc = self.host.encode("idna")
857 if self.port == self.default_port:
858 self.putheader('Host', host_enc)
859 else:
860 self.putheader('Host', "%s:%s" % (host_enc, self.port))
861
862 # note: we are assuming that clients will not attempt to set these
863 # headers since *this* library must deal with the
864 # consequences. this also means that when the supporting
865 # libraries are updated to recognize other forms, then this
866 # code should be changed (removed or updated).
867
868 # we only want a Content-Encoding of "identity" since we don't
869 # support encodings such as x-gzip or x-deflate.
870 if not skip_accept_encoding:
871 self.putheader('Accept-Encoding', 'identity')
872
873 # we can accept "chunked" Transfer-Encodings, but no others
874 # NOTE: no TE header implies *only* "chunked"
875 #self.putheader('TE', 'chunked')
876
877 # if TE is supplied in the header, then it must appear in a
878 # Connection header.
879 #self.putheader('Connection', 'TE')
880
881 else:
882 # For HTTP/1.0, the server will assume "not chunked"
883 pass
884
885 def putheader(self, header, value):
886 """Send a request header line to the server.
887
888 For example: h.putheader('Accept', 'text/html')
889 """
890 if self.__state != _CS_REQ_STARTED:
891 raise CannotSendHeader()
892
893 str = '%s: %s' % (header, value)
894 self._output(str)
895
896 def endheaders(self):
897 """Indicate that the last header line has been sent to the server."""
898
899 if self.__state == _CS_REQ_STARTED:
900 self.__state = _CS_REQ_SENT
901 else:
902 raise CannotSendHeader()
903
904 self._send_output()
905
906 def request(self, method, url, body=None, headers={}):
907 """Send a complete request to the server."""
908
909 try:
910 self._send_request(method, url, body, headers)
911 except socket.error, v:
912 # trap 'Broken pipe' if we're allowed to automatically reconnect
913 if v[0] != 32 or not self.auto_open:
914 raise
915 # try one more time
916 self._send_request(method, url, body, headers)
917
918 def _send_request(self, method, url, body, headers):
919 # honour explicitly requested Host: and Accept-Encoding headers
920 header_names = dict.fromkeys([k.lower() for k in headers])
921 skips = {}
922 if 'host' in header_names:
923 skips['skip_host'] = 1
924 if 'accept-encoding' in header_names:
925 skips['skip_accept_encoding'] = 1
926
927 self.putrequest(method, url, **skips)
928
929 if body and ('content-length' not in header_names):
930 thelen=None
931 try:
932 thelen=str(len(body))
933 except TypeError, te:
934 # If this is a file-like object, try to
935 # fstat its file descriptor
936 import os
937 try:
938 thelen = str(os.fstat(body.fileno()).st_size)
939 except (AttributeError, OSError):
940 # Don't send a length if this failed
941 if self.debuglevel > 0: print "Cannot stat!!"
942
943 if thelen is not None:
944 self.putheader('Content-Length',thelen)
945 for hdr, value in headers.iteritems():
946 self.putheader(hdr, value)
947 self.endheaders()
948
949 if body:
950 self.send(body)
951
952 def getresponse(self):
953 "Get the response from the server."
954
955 # if a prior response has been completed, then forget about it.
956 if self.__response and self.__response.isclosed():
957 self.__response = None
958
959 #
960 # if a prior response exists, then it must be completed (otherwise, we
961 # cannot read this response's header to determine the connection-close
962 # behavior)
963 #
964 # note: if a prior response existed, but was connection-close, then the
965 # socket and response were made independent of this HTTPConnection
966 # object since a new request requires that we open a whole new
967 # connection
968 #
969 # this means the prior response had one of two states:
970 # 1) will_close: this connection was reset and the prior socket and
971 # response operate independently
972 # 2) persistent: the response was retained and we await its
973 # isclosed() status to become true.
974 #
975 if self.__state != _CS_REQ_SENT or self.__response:
976 raise ResponseNotReady()
977
978 if self.debuglevel > 0:
979 response = self.response_class(self.sock, self.debuglevel,
980 strict=self.strict,
981 method=self._method)
982 else:
983 response = self.response_class(self.sock, strict=self.strict,
984 method=self._method)
985
986 response.begin()
987 assert response.will_close != _UNKNOWN
988 self.__state = _CS_IDLE
989
990 if response.will_close:
991 # this effectively passes the connection to the response
992 self.close()
993 else:
994 # remember this, so we can tell when it is complete
995 self.__response = response
996
997 return response
998
999
1000class HTTP:
1001 "Compatibility class with httplib.py from 1.5."
1002
1003 _http_vsn = 10
1004 _http_vsn_str = 'HTTP/1.0'
1005
1006 debuglevel = 0
1007
1008 _connection_class = HTTPConnection
1009
1010 def __init__(self, host='', port=None, strict=None):
1011 "Provide a default host, since the superclass requires one."
1012
1013 # some joker passed 0 explicitly, meaning default port
1014 if port == 0:
1015 port = None
1016
1017 # Note that we may pass an empty string as the host; this will throw
1018 # an error when we attempt to connect. Presumably, the client code
1019 # will call connect before then, with a proper host.
1020 self._setup(self._connection_class(host, port, strict))
1021
1022 def _setup(self, conn):
1023 self._conn = conn
1024
1025 # set up delegation to flesh out interface
1026 self.send = conn.send
1027 self.putrequest = conn.putrequest
1028 self.endheaders = conn.endheaders
1029 self.set_debuglevel = conn.set_debuglevel
1030
1031 conn._http_vsn = self._http_vsn
1032 conn._http_vsn_str = self._http_vsn_str
1033
1034 self.file = None
1035
1036 def connect(self, host=None, port=None):
1037 "Accept arguments to set the host/port, since the superclass doesn't."
1038
1039 if host is not None:
1040 self._conn._set_hostport(host, port)
1041 self._conn.connect()
1042
1043 def getfile(self):
1044 "Provide a getfile, since the superclass' does not use this concept."
1045 return self.file
1046
1047 def putheader(self, header, *values):
1048 "The superclass allows only one value argument."
1049 self._conn.putheader(header, '\r\n\t'.join(values))
1050
1051 def getreply(self):
1052 """Compat definition since superclass does not define it.
1053
1054 Returns a tuple consisting of:
1055 - server status code (e.g. '200' if all goes well)
1056 - server "reason" corresponding to status code
1057 - any RFC822 headers in the response from the server
1058 """
1059 try:
1060 response = self._conn.getresponse()
1061 except BadStatusLine, e:
1062 ### hmm. if getresponse() ever closes the socket on a bad request,
1063 ### then we are going to have problems with self.sock
1064
1065 ### should we keep this behavior? do people use it?
1066 # keep the socket open (as a file), and return it
1067 self.file = self._conn.sock.makefile('rb', 0)
1068
1069 # close our socket -- we want to restart after any protocol error
1070 self.close()
1071
1072 self.headers = None
1073 return -1, e.line, None
1074
1075 self.headers = response.msg
1076 self.file = response.fp
1077 return response.status, response.reason, response.msg
1078
1079 def close(self):
1080 self._conn.close()
1081
1082 # note that self.file == response.fp, which gets closed by the
1083 # superclass. just clear the object ref here.
1084 ### hmm. messy. if status==-1, then self.file is owned by us.
1085 ### well... we aren't explicitly closing, but losing this ref will
1086 ### do it
1087 self.file = None
1088
1089try:
1090 import ssl
1091except ImportError:
1092 pass
1093else:
1094 class HTTPSConnection(HTTPConnection):
1095 "This class allows communication via SSL."
1096
1097 default_port = HTTPS_PORT
1098
1099 def __init__(self, host, port=None, key_file=None, cert_file=None,
1100 strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1101 HTTPConnection.__init__(self, host, port, strict, timeout)
1102 self.key_file = key_file
1103 self.cert_file = cert_file
1104
1105 def connect(self):
1106 "Connect to a host on a given (SSL) port."
1107
1108 sock = socket.create_connection((self.host, self.port), self.timeout)
1109 if self._tunnel_host:
1110 self.sock = sock
1111 self._tunnel()
1112 self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1113
1114 __all__.append("HTTPSConnection")
1115
1116 class HTTPS(HTTP):
1117 """Compatibility with 1.5 httplib interface
1118
1119 Python 1.5.2 did not have an HTTPS class, but it defined an
1120 interface for sending http requests that is also useful for
1121 https.
1122 """
1123
1124 _connection_class = HTTPSConnection
1125
1126 def __init__(self, host='', port=None, key_file=None, cert_file=None,
1127 strict=None):
1128 # provide a default host, pass the X509 cert info
1129
1130 # urf. compensate for bad input.
1131 if port == 0:
1132 port = None
1133 self._setup(self._connection_class(host, port, key_file,
1134 cert_file, strict))
1135
1136 # we never actually use these for anything, but we keep them
1137 # here for compatibility with post-1.5.2 CVS.
1138 self.key_file = key_file
1139 self.cert_file = cert_file
1140
1141
1142 def FakeSocket (sock, sslobj):
1143 warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1144 "Use the result of ssl.wrap_socket() directly instead.",
1145 DeprecationWarning, stacklevel=2)
1146 return sslobj
1147
1148
1149class HTTPException(Exception):
1150 # Subclasses that define an __init__ must call Exception.__init__
1151 # or define self.args. Otherwise, str() will fail.
1152 pass
1153
1154class NotConnected(HTTPException):
1155 pass
1156
1157class InvalidURL(HTTPException):
1158 pass
1159
1160class UnknownProtocol(HTTPException):
1161 def __init__(self, version):
1162 self.args = version,
1163 self.version = version
1164
1165class UnknownTransferEncoding(HTTPException):
1166 pass
1167
1168class UnimplementedFileMode(HTTPException):
1169 pass
1170
1171class IncompleteRead(HTTPException):
1172 def __init__(self, partial, expected=None):
1173 self.args = partial,
1174 self.partial = partial
1175 self.expected = expected
1176 def __repr__(self):
1177 if self.expected is not None:
1178 e = ', %i more expected' % self.expected
1179 else:
1180 e = ''
1181 return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1182 def __str__(self):
1183 return repr(self)
1184
1185class ImproperConnectionState(HTTPException):
1186 pass
1187
1188class CannotSendRequest(ImproperConnectionState):
1189 pass
1190
1191class CannotSendHeader(ImproperConnectionState):
1192 pass
1193
1194class ResponseNotReady(ImproperConnectionState):
1195 pass
1196
1197class BadStatusLine(HTTPException):
1198 def __init__(self, line):
1199 self.args = line,
1200 self.line = line
1201
1202# for backwards compatibility
1203error = HTTPException
1204
1205class LineAndFileWrapper:
1206 """A limited file-like object for HTTP/0.9 responses."""
1207
1208 # The status-line parsing code calls readline(), which normally
1209 # get the HTTP status line. For a 0.9 response, however, this is
1210 # actually the first line of the body! Clients need to get a
1211 # readable file object that contains that line.
1212
1213 def __init__(self, line, file):
1214 self._line = line
1215 self._file = file
1216 self._line_consumed = 0
1217 self._line_offset = 0
1218 self._line_left = len(line)
1219
1220 def __getattr__(self, attr):
1221 return getattr(self._file, attr)
1222
1223 def _done(self):
1224 # called when the last byte is read from the line. After the
1225 # call, all read methods are delegated to the underlying file
1226 # object.
1227 self._line_consumed = 1
1228 self.read = self._file.read
1229 self.readline = self._file.readline
1230 self.readlines = self._file.readlines
1231
1232 def read(self, amt=None):
1233 if self._line_consumed:
1234 return self._file.read(amt)
1235 assert self._line_left
1236 if amt is None or amt > self._line_left:
1237 s = self._line[self._line_offset:]
1238 self._done()
1239 if amt is None:
1240 return s + self._file.read()
1241 else:
1242 return s + self._file.read(amt - len(s))
1243 else:
1244 assert amt <= self._line_left
1245 i = self._line_offset
1246 j = i + amt
1247 s = self._line[i:j]
1248 self._line_offset = j
1249 self._line_left -= amt
1250 if self._line_left == 0:
1251 self._done()
1252 return s
1253
1254 def readline(self):
1255 if self._line_consumed:
1256 return self._file.readline()
1257 assert self._line_left
1258 s = self._line[self._line_offset:]
1259 self._done()
1260 return s
1261
1262 def readlines(self, size=None):
1263 if self._line_consumed:
1264 return self._file.readlines(size)
1265 assert self._line_left
1266 L = [self._line[self._line_offset:]]
1267 self._done()
1268 if size is None:
1269 return L + self._file.readlines()
1270 else:
1271 return L + self._file.readlines(size)
1272
1273def test():
1274 """Test this module.
1275
1276 A hodge podge of tests collected here, because they have too many
1277 external dependencies for the regular test suite.
1278 """
1279
1280 import sys
1281 import getopt
1282 opts, args = getopt.getopt(sys.argv[1:], 'd')
1283 dl = 0
1284 for o, a in opts:
1285 if o == '-d': dl = dl + 1
1286 host = 'www.python.org'
1287 selector = '/'
1288 if args[0:]: host = args[0]
1289 if args[1:]: selector = args[1]
1290 h = HTTP()
1291 h.set_debuglevel(dl)
1292 h.connect(host)
1293 h.putrequest('GET', selector)
1294 h.endheaders()
1295 status, reason, headers = h.getreply()
1296 print 'status =', status
1297 print 'reason =', reason
1298 print "read", len(h.getfile().read())
1299 print
1300 if headers:
1301 for header in headers.headers: print header.strip()
1302 print
1303
1304 # minimal test that code to extract host from url works
1305 class HTTP11(HTTP):
1306 _http_vsn = 11
1307 _http_vsn_str = 'HTTP/1.1'
1308
1309 h = HTTP11('www.python.org')
1310 h.putrequest('GET', 'http://www.python.org/~jeremy/')
1311 h.endheaders()
1312 h.getreply()
1313 h.close()
1314
1315 try:
1316 import ssl
1317 except ImportError:
1318 pass
1319 else:
1320
1321 for host, selector in (('sourceforge.net', '/projects/python'),
1322 ):
1323 print "https://%s%s" % (host, selector)
1324 hs = HTTPS()
1325 hs.set_debuglevel(dl)
1326 hs.connect(host)
1327 hs.putrequest('GET', selector)
1328 hs.endheaders()
1329 status, reason, headers = hs.getreply()
1330 print 'status =', status
1331 print 'reason =', reason
1332 print "read", len(hs.getfile().read())
1333 print
1334 if headers:
1335 for header in headers.headers: print header.strip()
1336 print
1337
1338if __name__ == '__main__':
1339 test()
Note: See TracBrowser for help on using the repository browser.