Context Navigation

httplib.py

Last change on this file was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 44.3 KB

Line
1	"""HTTP/1.1 client library
2
3	<intro stuff goes here>
4	<other stuff, too>
5
6	HTTPConnection goes through a number of "states", which define when a client
7	may legally make another request or fetch the response for a particular
8	request. This diagram details these state transitions:
9
10	(null)
11	\|
12	\| HTTPConnection()
13	v
14	Idle
15	\|
16	\| putrequest()
17	v
18	Request-started
19	\|
20	\| ( putheader() )* endheaders()
21	v
22	Request-sent
23	\|
24	\| response = getresponse()
25	v
26	Unread-response [Response-headers-read]
27	\|\____________________
28	\| \|
29	\| response.read() \| putrequest()
30	v v
31	Idle Req-started-unread-response
32	______/\|
33	/ \|
34	response.read() \| \| ( putheader() )* endheaders()
35	v v
36	Request-started Req-sent-unread-response
37	\|
38	\| response.read()
39	v
40	Request-sent
41
42	This diagram presents the following rules:
43	-- a second request may not be started until {response-headers-read}
44	-- a response [object] cannot be retrieved until {request-sent}
45	-- there is no differentiation between an unread response body and a
46	partially read response body
47
48	Note: this enforcement is applied by the HTTPConnection class. The
49	HTTPResponse class does not enforce this state machine, which
50	implies sophisticated clients may accelerate the request/response
51	pipeline. Caution should be taken, though: accelerating the states
52	beyond the above pattern may imply knowledge of the server's
53	connection-close behavior for certain requests. For example, it
54	is impossible to tell whether the server will close the connection
55	UNTIL the response headers have been read; this means that further
56	requests cannot be placed into the pipeline until it is known that
57	the server will NOT be closing the connection.
58
59	Logical State __state __response
60	------------- ------- ----------
61	Idle _CS_IDLE None
62	Request-started _CS_REQ_STARTED None
63	Request-sent _CS_REQ_SENT None
64	Unread-response _CS_IDLE <response_class>
65	Req-started-unread-response _CS_REQ_STARTED <response_class>
66	Req-sent-unread-response _CS_REQ_SENT <response_class>
67	"""
68
69	from array import array
70	import socket
71	from sys import py3kwarning
72	from urlparse import urlsplit
73	import warnings
74	with warnings.catch_warnings():
75	if py3kwarning:
76	warnings.filterwarnings("ignore", ".*mimetools has been removed",
77	DeprecationWarning)
78	import mimetools
79
80	try:
81	from cStringIO import StringIO
82	except ImportError:
83	from StringIO import StringIO
84
85	__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
86	"HTTPException", "NotConnected", "UnknownProtocol",
87	"UnknownTransferEncoding", "UnimplementedFileMode",
88	"IncompleteRead", "InvalidURL", "ImproperConnectionState",
89	"CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
90	"BadStatusLine", "error", "responses"]
91
92	HTTP_PORT = 80
93	HTTPS_PORT = 443
94
95	_UNKNOWN = 'UNKNOWN'
96
97	# connection states
98	_CS_IDLE = 'Idle'
99	_CS_REQ_STARTED = 'Request-started'
100	_CS_REQ_SENT = 'Request-sent'
101
102	# status codes
103	# informational
104	CONTINUE = 100
105	SWITCHING_PROTOCOLS = 101
106	PROCESSING = 102
107
108	# successful
109	OK = 200
110	CREATED = 201
111	ACCEPTED = 202
112	NON_AUTHORITATIVE_INFORMATION = 203
113	NO_CONTENT = 204
114	RESET_CONTENT = 205
115	PARTIAL_CONTENT = 206
116	MULTI_STATUS = 207
117	IM_USED = 226
118
119	# redirection
120	MULTIPLE_CHOICES = 300
121	MOVED_PERMANENTLY = 301
122	FOUND = 302
123	SEE_OTHER = 303
124	NOT_MODIFIED = 304
125	USE_PROXY = 305
126	TEMPORARY_REDIRECT = 307
127
128	# client error
129	BAD_REQUEST = 400
130	UNAUTHORIZED = 401
131	PAYMENT_REQUIRED = 402
132	FORBIDDEN = 403
133	NOT_FOUND = 404
134	METHOD_NOT_ALLOWED = 405
135	NOT_ACCEPTABLE = 406
136	PROXY_AUTHENTICATION_REQUIRED = 407
137	REQUEST_TIMEOUT = 408
138	CONFLICT = 409
139	GONE = 410
140	LENGTH_REQUIRED = 411
141	PRECONDITION_FAILED = 412
142	REQUEST_ENTITY_TOO_LARGE = 413
143	REQUEST_URI_TOO_LONG = 414
144	UNSUPPORTED_MEDIA_TYPE = 415
145	REQUESTED_RANGE_NOT_SATISFIABLE = 416
146	EXPECTATION_FAILED = 417
147	UNPROCESSABLE_ENTITY = 422
148	LOCKED = 423
149	FAILED_DEPENDENCY = 424
150	UPGRADE_REQUIRED = 426
151
152	# server error
153	INTERNAL_SERVER_ERROR = 500
154	NOT_IMPLEMENTED = 501
155	BAD_GATEWAY = 502
156	SERVICE_UNAVAILABLE = 503
157	GATEWAY_TIMEOUT = 504
158	HTTP_VERSION_NOT_SUPPORTED = 505
159	INSUFFICIENT_STORAGE = 507
160	NOT_EXTENDED = 510
161
162	# Mapping status codes to official W3C names
163	responses = {
164	100: 'Continue',
165	101: 'Switching Protocols',
166
167	200: 'OK',
168	201: 'Created',
169	202: 'Accepted',
170	203: 'Non-Authoritative Information',
171	204: 'No Content',
172	205: 'Reset Content',
173	206: 'Partial Content',
174
175	300: 'Multiple Choices',
176	301: 'Moved Permanently',
177	302: 'Found',
178	303: 'See Other',
179	304: 'Not Modified',
180	305: 'Use Proxy',
181	306: '(Unused)',
182	307: 'Temporary Redirect',
183
184	400: 'Bad Request',
185	401: 'Unauthorized',
186	402: 'Payment Required',
187	403: 'Forbidden',
188	404: 'Not Found',
189	405: 'Method Not Allowed',
190	406: 'Not Acceptable',
191	407: 'Proxy Authentication Required',
192	408: 'Request Timeout',
193	409: 'Conflict',
194	410: 'Gone',
195	411: 'Length Required',
196	412: 'Precondition Failed',
197	413: 'Request Entity Too Large',
198	414: 'Request-URI Too Long',
199	415: 'Unsupported Media Type',
200	416: 'Requested Range Not Satisfiable',
201	417: 'Expectation Failed',
202
203	500: 'Internal Server Error',
204	501: 'Not Implemented',
205	502: 'Bad Gateway',
206	503: 'Service Unavailable',
207	504: 'Gateway Timeout',
208	505: 'HTTP Version Not Supported',
209	}
210
211	# maximal amount of data to read at one time in _safe_read
212	MAXAMOUNT = 1048576
213
214	class HTTPMessage(mimetools.Message):
215
216	def addheader(self, key, value):
217	"""Add header for field key handling repeats."""
218	prev = self.dict.get(key)
219	if prev is None:
220	self.dict[key] = value
221	else:
222	combined = ", ".join((prev, value))
223	self.dict[key] = combined
224
225	def addcontinue(self, key, more):
226	"""Add more field data from a continuation line."""
227	prev = self.dict[key]
228	self.dict[key] = prev + "\n " + more
229
230	def readheaders(self):
231	"""Read header lines.
232
233	Read header lines up to the entirely blank line that terminates them.
234	The (normally blank) line that ends the headers is skipped, but not
235	included in the returned list. If a non-header line ends the headers,
236	(which is an error), an attempt is made to backspace over it; it is
237	never included in the returned list.
238
239	The variable self.status is set to the empty string if all went well,
240	otherwise it is an error message. The variable self.headers is a
241	completely uninterpreted list of lines contained in the header (so
242	printing them will reproduce the header exactly as it appears in the
243	file).
244
245	If multiple header fields with the same name occur, they are combined
246	according to the rules in RFC 2616 sec 4.2:
247
248	Appending each subsequent field-value to the first, each separated
249	by a comma. The order in which header fields with the same field-name
250	are received is significant to the interpretation of the combined
251	field value.
252	"""
253	# XXX The implementation overrides the readheaders() method of
254	# rfc822.Message. The base class design isn't amenable to
255	# customized behavior here so the method here is a copy of the
256	# base class code with a few small changes.
257
258	self.dict = {}
259	self.unixfrom = ''
260	self.headers = hlist = []
261	self.status = ''
262	headerseen = ""
263	firstline = 1
264	startofline = unread = tell = None
265	if hasattr(self.fp, 'unread'):
266	unread = self.fp.unread
267	elif self.seekable:
268	tell = self.fp.tell
269	while True:
270	if tell:
271	try:
272	startofline = tell()
273	except IOError:
274	startofline = tell = None
275	self.seekable = 0
276	line = self.fp.readline()
277	if not line:
278	self.status = 'EOF in headers'
279	break
280	# Skip unix From name time lines
281	if firstline and line.startswith('From '):
282	self.unixfrom = self.unixfrom + line
283	continue
284	firstline = 0
285	if headerseen and line[0] in ' \t':
286	# XXX Not sure if continuation lines are handled properly
287	# for http and/or for repeating headers
288	# It's a continuation line.
289	hlist.append(line)
290	self.addcontinue(headerseen, line.strip())
291	continue
292	elif self.iscomment(line):
293	# It's a comment. Ignore it.
294	continue
295	elif self.islast(line):
296	# Note! No pushback here! The delimiter line gets eaten.
297	break
298	headerseen = self.isheader(line)
299	if headerseen:
300	# It's a legal header line, save it.
301	hlist.append(line)
302	self.addheader(headerseen, line[len(headerseen)+1:].strip())
303	continue
304	else:
305	# It's not a header line; throw it back and stop here.
306	if not self.dict:
307	self.status = 'No headers'
308	else:
309	self.status = 'Non-header line where header expected'
310	# Try to undo the read.
311	if unread:
312	unread(line)
313	elif tell:
314	self.fp.seek(startofline)
315	else:
316	self.status = self.status + '; bad seek'
317	break
318
319	class HTTPResponse:
320
321	# strict: If true, raise BadStatusLine if the status line can't be
322	# parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
323	# false because it prevents clients from talking to HTTP/0.9
324	# servers. Note that a response with a sufficiently corrupted
325	# status line will look like an HTTP/0.9 response.
326
327	# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
328
329	def __init__(self, sock, debuglevel=0, strict=0, method=None):
330	self.fp = sock.makefile('rb', 0)
331	self.debuglevel = debuglevel
332	self.strict = strict
333	self._method = method
334
335	self.msg = None
336
337	# from the Status-Line of the response
338	self.version = _UNKNOWN # HTTP-Version
339	self.status = _UNKNOWN # Status-Code
340	self.reason = _UNKNOWN # Reason-Phrase
341
342	self.chunked = _UNKNOWN # is "chunked" being used?
343	self.chunk_left = _UNKNOWN # bytes left to read in current chunk
344	self.length = _UNKNOWN # number of bytes left in response
345	self.will_close = _UNKNOWN # conn will close at end of response
346
347	def _read_status(self):
348	# Initialize with Simple-Response defaults
349	line = self.fp.readline()
350	if self.debuglevel > 0:
351	print "reply:", repr(line)
352	if not line:
353	# Presumably, the server closed the connection before
354	# sending a valid response.
355	raise BadStatusLine(line)
356	try:
357	[version, status, reason] = line.split(None, 2)
358	except ValueError:
359	try:
360	[version, status] = line.split(None, 1)
361	reason = ""
362	except ValueError:
363	# empty version will cause next test to fail and status
364	# will be treated as 0.9 response.
365	version = ""
366	if not version.startswith('HTTP/'):
367	if self.strict:
368	self.close()
369	raise BadStatusLine(line)
370	else:
371	# assume it's a Simple-Response from an 0.9 server
372	self.fp = LineAndFileWrapper(line, self.fp)
373	return "HTTP/0.9", 200, ""
374
375	# The status code is a three-digit number
376	try:
377	status = int(status)
378	if status < 100 or status > 999:
379	raise BadStatusLine(line)
380	except ValueError:
381	raise BadStatusLine(line)
382	return version, status, reason
383
384	def begin(self):
385	if self.msg is not None:
386	# we've already started reading the response
387	return
388
389	# read until we get a non-100 response
390	while True:
391	version, status, reason = self._read_status()
392	if status != CONTINUE:
393	break
394	# skip the header from the 100 response
395	while True:
396	skip = self.fp.readline().strip()
397	if not skip:
398	break
399	if self.debuglevel > 0:
400	print "header:", skip
401
402	self.status = status
403	self.reason = reason.strip()
404	if version == 'HTTP/1.0':
405	self.version = 10
406	elif version.startswith('HTTP/1.'):
407	self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
408	elif version == 'HTTP/0.9':
409	self.version = 9
410	else:
411	raise UnknownProtocol(version)
412
413	if self.version == 9:
414	self.length = None
415	self.chunked = 0
416	self.will_close = 1
417	self.msg = HTTPMessage(StringIO())
418	return
419
420	self.msg = HTTPMessage(self.fp, 0)
421	if self.debuglevel > 0:
422	for hdr in self.msg.headers:
423	print "header:", hdr,
424
425	# don't let the msg keep an fp
426	self.msg.fp = None
427
428	# are we using the chunked-style of transfer encoding?
429	tr_enc = self.msg.getheader('transfer-encoding')
430	if tr_enc and tr_enc.lower() == "chunked":
431	self.chunked = 1
432	self.chunk_left = None
433	else:
434	self.chunked = 0
435
436	# will the connection close at the end of the response?
437	self.will_close = self._check_close()
438
439	# do we have a Content-Length?
440	# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
441	length = self.msg.getheader('content-length')
442	if length and not self.chunked:
443	try:
444	self.length = int(length)
445	except ValueError:
446	self.length = None
447	else:
448	if self.length < 0: # ignore nonsensical negative lengths
449	self.length = None
450	else:
451	self.length = None
452
453	# does the body have a fixed length? (of zero)
454	if (status == NO_CONTENT or status == NOT_MODIFIED or
455	100 <= status < 200 or # 1xx codes
456	self._method == 'HEAD'):
457	self.length = 0
458
459	# if the connection remains open, and we aren't using chunked, and
460	# a content-length was not provided, then assume that the connection
461	# WILL close.
462	if not self.will_close and \
463	not self.chunked and \
464	self.length is None:
465	self.will_close = 1
466
467	def _check_close(self):
468	conn = self.msg.getheader('connection')
469	if self.version == 11:
470	# An HTTP/1.1 proxy is assumed to stay open unless
471	# explicitly closed.
472	conn = self.msg.getheader('connection')
473	if conn and "close" in conn.lower():
474	return True
475	return False
476
477	# Some HTTP/1.0 implementations have support for persistent
478	# connections, using rules different than HTTP/1.1.
479
480	# For older HTTP, Keep-Alive indicates persistent connection.
481	if self.msg.getheader('keep-alive'):
482	return False
483
484	# At least Akamai returns a "Connection: Keep-Alive" header,
485	# which was supposed to be sent by the client.
486	if conn and "keep-alive" in conn.lower():
487	return False
488
489	# Proxy-Connection is a netscape hack.
490	pconn = self.msg.getheader('proxy-connection')
491	if pconn and "keep-alive" in pconn.lower():
492	return False
493
494	# otherwise, assume it will close
495	return True
496
497	def close(self):
498	if self.fp:
499	self.fp.close()
500	self.fp = None
501
502	def isclosed(self):
503	# NOTE: it is possible that we will not ever call self.close(). This
504	# case occurs when will_close is TRUE, length is None, and we
505	# read up to the last byte, but NOT past it.
506	#
507	# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
508	# called, meaning self.isclosed() is meaningful.
509	return self.fp is None
510
511	# XXX It would be nice to have readline and __iter__ for this, too.
512
513	def read(self, amt=None):
514	if self.fp is None:
515	return ''
516
517	if self.chunked:
518	return self._read_chunked(amt)
519
520	if amt is None:
521	# unbounded read
522	if self.length is None:
523	s = self.fp.read()
524	else:
525	s = self._safe_read(self.length)
526	self.length = 0
527	self.close() # we read everything
528	return s
529
530	if self.length is not None:
531	if amt > self.length:
532	# clip the read to the "end of response"
533	amt = self.length
534
535	# we do not use _safe_read() here because this may be a .will_close
536	# connection, and the user is reading more bytes than will be provided
537	# (for example, reading in 1k chunks)
538	s = self.fp.read(amt)
539	if self.length is not None:
540	self.length -= len(s)
541	if not self.length:
542	self.close()
543	return s
544
545	def _read_chunked(self, amt):
546	assert self.chunked != _UNKNOWN
547	chunk_left = self.chunk_left
548	value = []
549	while True:
550	if chunk_left is None:
551	line = self.fp.readline()
552	i = line.find(';')
553	if i >= 0:
554	line = line[:i] # strip chunk-extensions
555	try:
556	chunk_left = int(line, 16)
557	except ValueError:
558	# close the connection as protocol synchronisation is
559	# probably lost
560	self.close()
561	raise IncompleteRead(''.join(value))
562	if chunk_left == 0:
563	break
564	if amt is None:
565	value.append(self._safe_read(chunk_left))
566	elif amt < chunk_left:
567	value.append(self._safe_read(amt))
568	self.chunk_left = chunk_left - amt
569	return ''.join(value)
570	elif amt == chunk_left:
571	value.append(self._safe_read(amt))
572	self._safe_read(2) # toss the CRLF at the end of the chunk
573	self.chunk_left = None
574	return ''.join(value)
575	else:
576	value.append(self._safe_read(chunk_left))
577	amt -= chunk_left
578
579	# we read the whole chunk, get another
580	self._safe_read(2) # toss the CRLF at the end of the chunk
581	chunk_left = None
582
583	# read and discard trailer up to the CRLF terminator
584	### note: we shouldn't have any trailers!
585	while True:
586	line = self.fp.readline()
587	if not line:
588	# a vanishingly small number of sites EOF without
589	# sending the trailer
590	break
591	if line == '\r\n':
592	break
593
594	# we read everything; close the "file"
595	self.close()
596
597	return ''.join(value)
598
599	def _safe_read(self, amt):
600	"""Read the number of bytes requested, compensating for partial reads.
601
602	Normally, we have a blocking socket, but a read() can be interrupted
603	by a signal (resulting in a partial read).
604
605	Note that we cannot distinguish between EOF and an interrupt when zero
606	bytes have been read. IncompleteRead() will be raised in this
607	situation.
608
609	This function should be used when <amt> bytes "should" be present for
610	reading. If the bytes are truly not available (due to EOF), then the
611	IncompleteRead exception can be used to detect the problem.
612	"""
613	s = []
614	while amt > 0:
615	chunk = self.fp.read(min(amt, MAXAMOUNT))
616	if not chunk:
617	raise IncompleteRead(''.join(s), amt)
618	s.append(chunk)
619	amt -= len(chunk)
620	return ''.join(s)
621
622	def getheader(self, name, default=None):
623	if self.msg is None:
624	raise ResponseNotReady()
625	return self.msg.getheader(name, default)
626
627	def getheaders(self):
628	"""Return list of (header, value) tuples."""
629	if self.msg is None:
630	raise ResponseNotReady()
631	return self.msg.items()
632
633
634	class HTTPConnection:
635
636	_http_vsn = 11
637	_http_vsn_str = 'HTTP/1.1'
638
639	response_class = HTTPResponse
640	default_port = HTTP_PORT
641	auto_open = 1
642	debuglevel = 0
643	strict = 0
644
645	def __init__(self, host, port=None, strict=None,
646	timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
647	self.timeout = timeout
648	self.sock = None
649	self._buffer = []
650	self.__response = None
651	self.__state = _CS_IDLE
652	self._method = None
653	self._tunnel_host = None
654	self._tunnel_port = None
655	self._tunnel_headers = {}
656
657	self._set_hostport(host, port)
658	if strict is not None:
659	self.strict = strict
660
661	def _set_tunnel(self, host, port=None, headers=None):
662	""" Sets up the host and the port for the HTTP CONNECT Tunnelling.
663
664	The headers argument should be a mapping of extra HTTP headers
665	to send with the CONNECT request.
666	"""
667	self._tunnel_host = host
668	self._tunnel_port = port
669	if headers:
670	self._tunnel_headers = headers
671	else:
672	self._tunnel_headers.clear()
673
674	def _set_hostport(self, host, port):
675	if port is None:
676	i = host.rfind(':')
677	j = host.rfind(']') # ipv6 addresses have [...]
678	if i > j:
679	try:
680	port = int(host[i+1:])
681	except ValueError:
682	raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
683	host = host[:i]
684	else:
685	port = self.default_port
686	if host and host[0] == '[' and host[-1] == ']':
687	host = host[1:-1]
688	self.host = host
689	self.port = port
690
691	def set_debuglevel(self, level):
692	self.debuglevel = level
693
694	def _tunnel(self):
695	self._set_hostport(self._tunnel_host, self._tunnel_port)
696	self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
697	for header, value in self._tunnel_headers.iteritems():
698	self.send("%s: %s\r\n" % (header, value))
699	self.send("\r\n")
700	response = self.response_class(self.sock, strict = self.strict,
701	method = self._method)
702	(version, code, message) = response._read_status()
703
704	if code != 200:
705	self.close()
706	raise socket.error, "Tunnel connection failed: %d %s" % (code,
707	message.strip())
708	while True:
709	line = response.fp.readline()
710	if line == '\r\n': break
711
712
713	def connect(self):
714	"""Connect to the host and port specified in __init__."""
715	self.sock = socket.create_connection((self.host,self.port),
716	self.timeout)
717
718	if self._tunnel_host:
719	self._tunnel()
720
721	def close(self):
722	"""Close the connection to the HTTP server."""
723	if self.sock:
724	self.sock.close() # close it manually... there may be other refs
725	self.sock = None
726	if self.__response:
727	self.__response.close()
728	self.__response = None
729	self.__state = _CS_IDLE
730
731	def send(self, str):
732	"""Send `str' to the server."""
733	if self.sock is None:
734	if self.auto_open:
735	self.connect()
736	else:
737	raise NotConnected()
738
739	# send the data to the server. if we get a broken pipe, then close
740	# the socket. we want to reconnect when somebody tries to send again.
741	#
742	# NOTE: we DO propagate the error, though, because we cannot simply
743	# ignore the error... the caller will know if they can retry.
744	if self.debuglevel > 0:
745	print "send:", repr(str)
746	try:
747	blocksize=8192
748	if hasattr(str,'read') and not isinstance(str, array):
749	if self.debuglevel > 0: print "sendIng a read()able"
750	data=str.read(blocksize)
751	while data:
752	self.sock.sendall(data)
753	data=str.read(blocksize)
754	else:
755	self.sock.sendall(str)
756	except socket.error, v:
757	if v[0] == 32: # Broken pipe
758	self.close()
759	raise
760
761	def _output(self, s):
762	"""Add a line of output to the current request buffer.
763
764	Assumes that the line does not end with \\r\\n.
765	"""
766	self._buffer.append(s)
767
768	def _send_output(self):
769	"""Send the currently buffered request and clear the buffer.
770
771	Appends an extra \\r\\n to the buffer.
772	"""
773	self._buffer.extend(("", ""))
774	msg = "\r\n".join(self._buffer)
775	del self._buffer[:]
776	self.send(msg)
777
778	def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
779	"""Send a request to the server.
780
781	`method' specifies an HTTP request method, e.g. 'GET'.
782	`url' specifies the object being requested, e.g. '/index.html'.
783	`skip_host' if True does not add automatically a 'Host:' header
784	`skip_accept_encoding' if True does not add automatically an
785	'Accept-Encoding:' header
786	"""
787
788	# if a prior response has been completed, then forget about it.
789	if self.__response and self.__response.isclosed():
790	self.__response = None
791
792
793	# in certain cases, we cannot issue another request on this connection.
794	# this occurs when:
795	# 1) we are in the process of sending a request. (_CS_REQ_STARTED)
796	# 2) a response to a previous request has signalled that it is going
797	# to close the connection upon completion.
798	# 3) the headers for the previous response have not been read, thus
799	# we cannot determine whether point (2) is true. (_CS_REQ_SENT)
800	#
801	# if there is no prior response, then we can request at will.
802	#
803	# if point (2) is true, then we will have passed the socket to the
804	# response (effectively meaning, "there is no prior response"), and
805	# will open a new one when a new request is made.
806	#
807	# Note: if a prior response exists, then we can start a new request.
808	# We are not allowed to begin fetching the response to this new
809	# request, however, until that prior response is complete.
810	#
811	if self.__state == _CS_IDLE:
812	self.__state = _CS_REQ_STARTED
813	else:
814	raise CannotSendRequest()
815
816	# Save the method we use, we need it later in the response phase
817	self._method = method
818	if not url:
819	url = '/'
820	str = '%s %s %s' % (method, url, self._http_vsn_str)
821
822	self._output(str)
823
824	if self._http_vsn == 11:
825	# Issue some standard headers for better HTTP/1.1 compliance
826
827	if not skip_host:
828	# this header is issued only for HTTP/1.1
829	# connections. more specifically, this means it is
830	# only issued when the client uses the new
831	# HTTPConnection() class. backwards-compat clients
832	# will be using HTTP/1.0 and those clients may be
833	# issuing this header themselves. we should NOT issue
834	# it twice; some web servers (such as Apache) barf
835	# when they see two Host: headers
836
837	# If we need a non-standard port,include it in the
838	# header. If the request is going through a proxy,
839	# but the host of the actual URL, not the host of the
840	# proxy.
841
842	netloc = ''
843	if url.startswith('http'):
844	nil, netloc, nil, nil, nil = urlsplit(url)
845
846	if netloc:
847	try:
848	netloc_enc = netloc.encode("ascii")
849	except UnicodeEncodeError:
850	netloc_enc = netloc.encode("idna")
851	self.putheader('Host', netloc_enc)
852	else:
853	try:
854	host_enc = self.host.encode("ascii")
855	except UnicodeEncodeError:
856	host_enc = self.host.encode("idna")
857	if self.port == self.default_port:
858	self.putheader('Host', host_enc)
859	else:
860	self.putheader('Host', "%s:%s" % (host_enc, self.port))
861
862	# note: we are assuming that clients will not attempt to set these
863	# headers since this library must deal with the
864	# consequences. this also means that when the supporting
865	# libraries are updated to recognize other forms, then this
866	# code should be changed (removed or updated).
867
868	# we only want a Content-Encoding of "identity" since we don't
869	# support encodings such as x-gzip or x-deflate.
870	if not skip_accept_encoding:
871	self.putheader('Accept-Encoding', 'identity')
872
873	# we can accept "chunked" Transfer-Encodings, but no others
874	# NOTE: no TE header implies only "chunked"
875	#self.putheader('TE', 'chunked')
876
877	# if TE is supplied in the header, then it must appear in a
878	# Connection header.
879	#self.putheader('Connection', 'TE')
880
881	else:
882	# For HTTP/1.0, the server will assume "not chunked"
883	pass
884
885	def putheader(self, header, value):
886	"""Send a request header line to the server.
887
888	For example: h.putheader('Accept', 'text/html')
889	"""
890	if self.__state != _CS_REQ_STARTED:
891	raise CannotSendHeader()
892
893	str = '%s: %s' % (header, value)
894	self._output(str)
895
896	def endheaders(self):
897	"""Indicate that the last header line has been sent to the server."""
898
899	if self.__state == _CS_REQ_STARTED:
900	self.__state = _CS_REQ_SENT
901	else:
902	raise CannotSendHeader()
903
904	self._send_output()
905
906	def request(self, method, url, body=None, headers={}):
907	"""Send a complete request to the server."""
908
909	try:
910	self._send_request(method, url, body, headers)
911	except socket.error, v:
912	# trap 'Broken pipe' if we're allowed to automatically reconnect
913	if v[0] != 32 or not self.auto_open:
914	raise
915	# try one more time
916	self._send_request(method, url, body, headers)
917
918	def _send_request(self, method, url, body, headers):
919	# honour explicitly requested Host: and Accept-Encoding headers
920	header_names = dict.fromkeys([k.lower() for k in headers])
921	skips = {}
922	if 'host' in header_names:
923	skips['skip_host'] = 1
924	if 'accept-encoding' in header_names:
925	skips['skip_accept_encoding'] = 1
926
927	self.putrequest(method, url, **skips)
928
929	if body and ('content-length' not in header_names):
930	thelen=None
931	try:
932	thelen=str(len(body))
933	except TypeError, te:
934	# If this is a file-like object, try to
935	# fstat its file descriptor
936	import os
937	try:
938	thelen = str(os.fstat(body.fileno()).st_size)
939	except (AttributeError, OSError):
940	# Don't send a length if this failed
941	if self.debuglevel > 0: print "Cannot stat!!"
942
943	if thelen is not None:
944	self.putheader('Content-Length',thelen)
945	for hdr, value in headers.iteritems():
946	self.putheader(hdr, value)
947	self.endheaders()
948
949	if body:
950	self.send(body)
951
952	def getresponse(self):
953	"Get the response from the server."
954
955	# if a prior response has been completed, then forget about it.
956	if self.__response and self.__response.isclosed():
957	self.__response = None
958
959	#
960	# if a prior response exists, then it must be completed (otherwise, we
961	# cannot read this response's header to determine the connection-close
962	# behavior)
963	#
964	# note: if a prior response existed, but was connection-close, then the
965	# socket and response were made independent of this HTTPConnection
966	# object since a new request requires that we open a whole new
967	# connection
968	#
969	# this means the prior response had one of two states:
970	# 1) will_close: this connection was reset and the prior socket and
971	# response operate independently
972	# 2) persistent: the response was retained and we await its
973	# isclosed() status to become true.
974	#
975	if self.__state != _CS_REQ_SENT or self.__response:
976	raise ResponseNotReady()
977
978	if self.debuglevel > 0:
979	response = self.response_class(self.sock, self.debuglevel,
980	strict=self.strict,
981	method=self._method)
982	else:
983	response = self.response_class(self.sock, strict=self.strict,
984	method=self._method)
985
986	response.begin()
987	assert response.will_close != _UNKNOWN
988	self.__state = _CS_IDLE
989
990	if response.will_close:
991	# this effectively passes the connection to the response
992	self.close()
993	else:
994	# remember this, so we can tell when it is complete
995	self.__response = response
996
997	return response
998
999
1000	class HTTP:
1001	"Compatibility class with httplib.py from 1.5."
1002
1003	_http_vsn = 10
1004	_http_vsn_str = 'HTTP/1.0'
1005
1006	debuglevel = 0
1007
1008	_connection_class = HTTPConnection
1009
1010	def __init__(self, host='', port=None, strict=None):
1011	"Provide a default host, since the superclass requires one."
1012
1013	# some joker passed 0 explicitly, meaning default port
1014	if port == 0:
1015	port = None
1016
1017	# Note that we may pass an empty string as the host; this will throw
1018	# an error when we attempt to connect. Presumably, the client code
1019	# will call connect before then, with a proper host.
1020	self._setup(self._connection_class(host, port, strict))
1021
1022	def _setup(self, conn):
1023	self._conn = conn
1024
1025	# set up delegation to flesh out interface
1026	self.send = conn.send
1027	self.putrequest = conn.putrequest
1028	self.endheaders = conn.endheaders
1029	self.set_debuglevel = conn.set_debuglevel
1030
1031	conn._http_vsn = self._http_vsn
1032	conn._http_vsn_str = self._http_vsn_str
1033
1034	self.file = None
1035
1036	def connect(self, host=None, port=None):
1037	"Accept arguments to set the host/port, since the superclass doesn't."
1038
1039	if host is not None:
1040	self._conn._set_hostport(host, port)
1041	self._conn.connect()
1042
1043	def getfile(self):
1044	"Provide a getfile, since the superclass' does not use this concept."
1045	return self.file
1046
1047	def putheader(self, header, *values):
1048	"The superclass allows only one value argument."
1049	self._conn.putheader(header, '\r\n\t'.join(values))
1050
1051	def getreply(self):
1052	"""Compat definition since superclass does not define it.
1053
1054	Returns a tuple consisting of:
1055	- server status code (e.g. '200' if all goes well)
1056	- server "reason" corresponding to status code
1057	- any RFC822 headers in the response from the server
1058	"""
1059	try:
1060	response = self._conn.getresponse()
1061	except BadStatusLine, e:
1062	### hmm. if getresponse() ever closes the socket on a bad request,
1063	### then we are going to have problems with self.sock
1064
1065	### should we keep this behavior? do people use it?
1066	# keep the socket open (as a file), and return it
1067	self.file = self._conn.sock.makefile('rb', 0)
1068
1069	# close our socket -- we want to restart after any protocol error
1070	self.close()
1071
1072	self.headers = None
1073	return -1, e.line, None
1074
1075	self.headers = response.msg
1076	self.file = response.fp
1077	return response.status, response.reason, response.msg
1078
1079	def close(self):
1080	self._conn.close()
1081
1082	# note that self.file == response.fp, which gets closed by the
1083	# superclass. just clear the object ref here.
1084	### hmm. messy. if status==-1, then self.file is owned by us.
1085	### well... we aren't explicitly closing, but losing this ref will
1086	### do it
1087	self.file = None
1088
1089	try:
1090	import ssl
1091	except ImportError:
1092	pass
1093	else:
1094	class HTTPSConnection(HTTPConnection):
1095	"This class allows communication via SSL."
1096
1097	default_port = HTTPS_PORT
1098
1099	def __init__(self, host, port=None, key_file=None, cert_file=None,
1100	strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
1101	HTTPConnection.__init__(self, host, port, strict, timeout)
1102	self.key_file = key_file
1103	self.cert_file = cert_file
1104
1105	def connect(self):
1106	"Connect to a host on a given (SSL) port."
1107
1108	sock = socket.create_connection((self.host, self.port), self.timeout)
1109	if self._tunnel_host:
1110	self.sock = sock
1111	self._tunnel()
1112	self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1113
1114	__all__.append("HTTPSConnection")
1115
1116	class HTTPS(HTTP):
1117	"""Compatibility with 1.5 httplib interface
1118
1119	Python 1.5.2 did not have an HTTPS class, but it defined an
1120	interface for sending http requests that is also useful for
1121	https.
1122	"""
1123
1124	_connection_class = HTTPSConnection
1125
1126	def __init__(self, host='', port=None, key_file=None, cert_file=None,
1127	strict=None):
1128	# provide a default host, pass the X509 cert info
1129
1130	# urf. compensate for bad input.
1131	if port == 0:
1132	port = None
1133	self._setup(self._connection_class(host, port, key_file,
1134	cert_file, strict))
1135
1136	# we never actually use these for anything, but we keep them
1137	# here for compatibility with post-1.5.2 CVS.
1138	self.key_file = key_file
1139	self.cert_file = cert_file
1140
1141
1142	def FakeSocket (sock, sslobj):
1143	warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1144	"Use the result of ssl.wrap_socket() directly instead.",
1145	DeprecationWarning, stacklevel=2)
1146	return sslobj
1147
1148
1149	class HTTPException(Exception):
1150	# Subclasses that define an __init__ must call Exception.__init__
1151	# or define self.args. Otherwise, str() will fail.
1152	pass
1153
1154	class NotConnected(HTTPException):
1155	pass
1156
1157	class InvalidURL(HTTPException):
1158	pass
1159
1160	class UnknownProtocol(HTTPException):
1161	def __init__(self, version):
1162	self.args = version,
1163	self.version = version
1164
1165	class UnknownTransferEncoding(HTTPException):
1166	pass
1167
1168	class UnimplementedFileMode(HTTPException):
1169	pass
1170
1171	class IncompleteRead(HTTPException):
1172	def __init__(self, partial, expected=None):
1173	self.args = partial,
1174	self.partial = partial
1175	self.expected = expected
1176	def __repr__(self):
1177	if self.expected is not None:
1178	e = ', %i more expected' % self.expected
1179	else:
1180	e = ''
1181	return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1182	def __str__(self):
1183	return repr(self)
1184
1185	class ImproperConnectionState(HTTPException):
1186	pass
1187
1188	class CannotSendRequest(ImproperConnectionState):
1189	pass
1190
1191	class CannotSendHeader(ImproperConnectionState):
1192	pass
1193
1194	class ResponseNotReady(ImproperConnectionState):
1195	pass
1196
1197	class BadStatusLine(HTTPException):
1198	def __init__(self, line):
1199	self.args = line,
1200	self.line = line
1201
1202	# for backwards compatibility
1203	error = HTTPException
1204
1205	class LineAndFileWrapper:
1206	"""A limited file-like object for HTTP/0.9 responses."""
1207
1208	# The status-line parsing code calls readline(), which normally
1209	# get the HTTP status line. For a 0.9 response, however, this is
1210	# actually the first line of the body! Clients need to get a
1211	# readable file object that contains that line.
1212
1213	def __init__(self, line, file):
1214	self._line = line
1215	self._file = file
1216	self._line_consumed = 0
1217	self._line_offset = 0
1218	self._line_left = len(line)
1219
1220	def __getattr__(self, attr):
1221	return getattr(self._file, attr)
1222
1223	def _done(self):
1224	# called when the last byte is read from the line. After the
1225	# call, all read methods are delegated to the underlying file
1226	# object.
1227	self._line_consumed = 1
1228	self.read = self._file.read
1229	self.readline = self._file.readline
1230	self.readlines = self._file.readlines
1231
1232	def read(self, amt=None):
1233	if self._line_consumed:
1234	return self._file.read(amt)
1235	assert self._line_left
1236	if amt is None or amt > self._line_left:
1237	s = self._line[self._line_offset:]
1238	self._done()
1239	if amt is None:
1240	return s + self._file.read()
1241	else:
1242	return s + self._file.read(amt - len(s))
1243	else:
1244	assert amt <= self._line_left
1245	i = self._line_offset
1246	j = i + amt
1247	s = self._line[i:j]
1248	self._line_offset = j
1249	self._line_left -= amt
1250	if self._line_left == 0:
1251	self._done()
1252	return s
1253
1254	def readline(self):
1255	if self._line_consumed:
1256	return self._file.readline()
1257	assert self._line_left
1258	s = self._line[self._line_offset:]
1259	self._done()
1260	return s
1261
1262	def readlines(self, size=None):
1263	if self._line_consumed:
1264	return self._file.readlines(size)
1265	assert self._line_left
1266	L = [self._line[self._line_offset:]]
1267	self._done()
1268	if size is None:
1269	return L + self._file.readlines()
1270	else:
1271	return L + self._file.readlines(size)
1272
1273	def test():
1274	"""Test this module.
1275
1276	A hodge podge of tests collected here, because they have too many
1277	external dependencies for the regular test suite.
1278	"""
1279
1280	import sys
1281	import getopt
1282	opts, args = getopt.getopt(sys.argv[1:], 'd')
1283	dl = 0
1284	for o, a in opts:
1285	if o == '-d': dl = dl + 1
1286	host = 'www.python.org'
1287	selector = '/'
1288	if args[0:]: host = args[0]
1289	if args[1:]: selector = args[1]
1290	h = HTTP()
1291	h.set_debuglevel(dl)
1292	h.connect(host)
1293	h.putrequest('GET', selector)
1294	h.endheaders()
1295	status, reason, headers = h.getreply()
1296	print 'status =', status
1297	print 'reason =', reason
1298	print "read", len(h.getfile().read())
1299	print
1300	if headers:
1301	for header in headers.headers: print header.strip()
1302	print
1303
1304	# minimal test that code to extract host from url works
1305	class HTTP11(HTTP):
1306	_http_vsn = 11
1307	_http_vsn_str = 'HTTP/1.1'
1308
1309	h = HTTP11('www.python.org')
1310	h.putrequest('GET', 'http://www.python.org/~jeremy/')
1311	h.endheaders()
1312	h.getreply()
1313	h.close()
1314
1315	try:
1316	import ssl
1317	except ImportError:
1318	pass
1319	else:
1320
1321	for host, selector in (('sourceforge.net', '/projects/python'),
1322	):
1323	print "https://%s%s" % (host, selector)
1324	hs = HTTPS()
1325	hs.set_debuglevel(dl)
1326	hs.connect(host)
1327	hs.putrequest('GET', selector)
1328	hs.endheaders()
1329	status, reason, headers = hs.getreply()
1330	print 'status =', status
1331	print 'reason =', reason
1332	print "read", len(hs.getfile().read())
1333	print
1334	if headers:
1335	for header in headers.headers: print header.strip()
1336	print
1337
1338	if __name__ == '__main__':
1339	test()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/vendor/Python-2.6.5/Lib/httplib.py

Download in other formats: