Context Navigation

httplib.py@ 610

Last change on this file since 610 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 45.9 KB

Line
1	r"""HTTP/1.1 client library
2
3	<intro stuff goes here>
4	<other stuff, too>
5
6	HTTPConnection goes through a number of "states", which define when a client
7	may legally make another request or fetch the response for a particular
8	request. This diagram details these state transitions:
9
10	(null)
11	\|
12	\| HTTPConnection()
13	v
14	Idle
15	\|
16	\| putrequest()
17	v
18	Request-started
19	\|
20	\| ( putheader() )* endheaders()
21	v
22	Request-sent
23	\|
24	\| response = getresponse()
25	v
26	Unread-response [Response-headers-read]
27	\|\____________________
28	\| \|
29	\| response.read() \| putrequest()
30	v v
31	Idle Req-started-unread-response
32	______/\|
33	/ \|
34	response.read() \| \| ( putheader() )* endheaders()
35	v v
36	Request-started Req-sent-unread-response
37	\|
38	\| response.read()
39	v
40	Request-sent
41
42	This diagram presents the following rules:
43	-- a second request may not be started until {response-headers-read}
44	-- a response [object] cannot be retrieved until {request-sent}
45	-- there is no differentiation between an unread response body and a
46	partially read response body
47
48	Note: this enforcement is applied by the HTTPConnection class. The
49	HTTPResponse class does not enforce this state machine, which
50	implies sophisticated clients may accelerate the request/response
51	pipeline. Caution should be taken, though: accelerating the states
52	beyond the above pattern may imply knowledge of the server's
53	connection-close behavior for certain requests. For example, it
54	is impossible to tell whether the server will close the connection
55	UNTIL the response headers have been read; this means that further
56	requests cannot be placed into the pipeline until it is known that
57	the server will NOT be closing the connection.
58
59	Logical State __state __response
60	------------- ------- ----------
61	Idle _CS_IDLE None
62	Request-started _CS_REQ_STARTED None
63	Request-sent _CS_REQ_SENT None
64	Unread-response _CS_IDLE <response_class>
65	Req-started-unread-response _CS_REQ_STARTED <response_class>
66	Req-sent-unread-response _CS_REQ_SENT <response_class>
67	"""
68
69	from array import array
70	import os
71	import socket
72	from sys import py3kwarning
73	from urlparse import urlsplit
74	import warnings
75	with warnings.catch_warnings():
76	if py3kwarning:
77	warnings.filterwarnings("ignore", ".*mimetools has been removed",
78	DeprecationWarning)
79	import mimetools
80
81	try:
82	from cStringIO import StringIO
83	except ImportError:
84	from StringIO import StringIO
85
86	__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
87	"HTTPException", "NotConnected", "UnknownProtocol",
88	"UnknownTransferEncoding", "UnimplementedFileMode",
89	"IncompleteRead", "InvalidURL", "ImproperConnectionState",
90	"CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
91	"BadStatusLine", "error", "responses"]
92
93	HTTP_PORT = 80
94	HTTPS_PORT = 443
95
96	_UNKNOWN = 'UNKNOWN'
97
98	# connection states
99	_CS_IDLE = 'Idle'
100	_CS_REQ_STARTED = 'Request-started'
101	_CS_REQ_SENT = 'Request-sent'
102
103	# status codes
104	# informational
105	CONTINUE = 100
106	SWITCHING_PROTOCOLS = 101
107	PROCESSING = 102
108
109	# successful
110	OK = 200
111	CREATED = 201
112	ACCEPTED = 202
113	NON_AUTHORITATIVE_INFORMATION = 203
114	NO_CONTENT = 204
115	RESET_CONTENT = 205
116	PARTIAL_CONTENT = 206
117	MULTI_STATUS = 207
118	IM_USED = 226
119
120	# redirection
121	MULTIPLE_CHOICES = 300
122	MOVED_PERMANENTLY = 301
123	FOUND = 302
124	SEE_OTHER = 303
125	NOT_MODIFIED = 304
126	USE_PROXY = 305
127	TEMPORARY_REDIRECT = 307
128
129	# client error
130	BAD_REQUEST = 400
131	UNAUTHORIZED = 401
132	PAYMENT_REQUIRED = 402
133	FORBIDDEN = 403
134	NOT_FOUND = 404
135	METHOD_NOT_ALLOWED = 405
136	NOT_ACCEPTABLE = 406
137	PROXY_AUTHENTICATION_REQUIRED = 407
138	REQUEST_TIMEOUT = 408
139	CONFLICT = 409
140	GONE = 410
141	LENGTH_REQUIRED = 411
142	PRECONDITION_FAILED = 412
143	REQUEST_ENTITY_TOO_LARGE = 413
144	REQUEST_URI_TOO_LONG = 414
145	UNSUPPORTED_MEDIA_TYPE = 415
146	REQUESTED_RANGE_NOT_SATISFIABLE = 416
147	EXPECTATION_FAILED = 417
148	UNPROCESSABLE_ENTITY = 422
149	LOCKED = 423
150	FAILED_DEPENDENCY = 424
151	UPGRADE_REQUIRED = 426
152
153	# server error
154	INTERNAL_SERVER_ERROR = 500
155	NOT_IMPLEMENTED = 501
156	BAD_GATEWAY = 502
157	SERVICE_UNAVAILABLE = 503
158	GATEWAY_TIMEOUT = 504
159	HTTP_VERSION_NOT_SUPPORTED = 505
160	INSUFFICIENT_STORAGE = 507
161	NOT_EXTENDED = 510
162
163	# Mapping status codes to official W3C names
164	responses = {
165	100: 'Continue',
166	101: 'Switching Protocols',
167
168	200: 'OK',
169	201: 'Created',
170	202: 'Accepted',
171	203: 'Non-Authoritative Information',
172	204: 'No Content',
173	205: 'Reset Content',
174	206: 'Partial Content',
175
176	300: 'Multiple Choices',
177	301: 'Moved Permanently',
178	302: 'Found',
179	303: 'See Other',
180	304: 'Not Modified',
181	305: 'Use Proxy',
182	306: '(Unused)',
183	307: 'Temporary Redirect',
184
185	400: 'Bad Request',
186	401: 'Unauthorized',
187	402: 'Payment Required',
188	403: 'Forbidden',
189	404: 'Not Found',
190	405: 'Method Not Allowed',
191	406: 'Not Acceptable',
192	407: 'Proxy Authentication Required',
193	408: 'Request Timeout',
194	409: 'Conflict',
195	410: 'Gone',
196	411: 'Length Required',
197	412: 'Precondition Failed',
198	413: 'Request Entity Too Large',
199	414: 'Request-URI Too Long',
200	415: 'Unsupported Media Type',
201	416: 'Requested Range Not Satisfiable',
202	417: 'Expectation Failed',
203
204	500: 'Internal Server Error',
205	501: 'Not Implemented',
206	502: 'Bad Gateway',
207	503: 'Service Unavailable',
208	504: 'Gateway Timeout',
209	505: 'HTTP Version Not Supported',
210	}
211
212	# maximal amount of data to read at one time in _safe_read
213	MAXAMOUNT = 1048576
214
215	# maximal line length when calling readline().
216	_MAXLINE = 65536
217
218	class HTTPMessage(mimetools.Message):
219
220	def addheader(self, key, value):
221	"""Add header for field key handling repeats."""
222	prev = self.dict.get(key)
223	if prev is None:
224	self.dict[key] = value
225	else:
226	combined = ", ".join((prev, value))
227	self.dict[key] = combined
228
229	def addcontinue(self, key, more):
230	"""Add more field data from a continuation line."""
231	prev = self.dict[key]
232	self.dict[key] = prev + "\n " + more
233
234	def readheaders(self):
235	"""Read header lines.
236
237	Read header lines up to the entirely blank line that terminates them.
238	The (normally blank) line that ends the headers is skipped, but not
239	included in the returned list. If a non-header line ends the headers,
240	(which is an error), an attempt is made to backspace over it; it is
241	never included in the returned list.
242
243	The variable self.status is set to the empty string if all went well,
244	otherwise it is an error message. The variable self.headers is a
245	completely uninterpreted list of lines contained in the header (so
246	printing them will reproduce the header exactly as it appears in the
247	file).
248
249	If multiple header fields with the same name occur, they are combined
250	according to the rules in RFC 2616 sec 4.2:
251
252	Appending each subsequent field-value to the first, each separated
253	by a comma. The order in which header fields with the same field-name
254	are received is significant to the interpretation of the combined
255	field value.
256	"""
257	# XXX The implementation overrides the readheaders() method of
258	# rfc822.Message. The base class design isn't amenable to
259	# customized behavior here so the method here is a copy of the
260	# base class code with a few small changes.
261
262	self.dict = {}
263	self.unixfrom = ''
264	self.headers = hlist = []
265	self.status = ''
266	headerseen = ""
267	firstline = 1
268	startofline = unread = tell = None
269	if hasattr(self.fp, 'unread'):
270	unread = self.fp.unread
271	elif self.seekable:
272	tell = self.fp.tell
273	while True:
274	if tell:
275	try:
276	startofline = tell()
277	except IOError:
278	startofline = tell = None
279	self.seekable = 0
280	line = self.fp.readline(_MAXLINE + 1)
281	if len(line) > _MAXLINE:
282	raise LineTooLong("header line")
283	if not line:
284	self.status = 'EOF in headers'
285	break
286	# Skip unix From name time lines
287	if firstline and line.startswith('From '):
288	self.unixfrom = self.unixfrom + line
289	continue
290	firstline = 0
291	if headerseen and line[0] in ' \t':
292	# XXX Not sure if continuation lines are handled properly
293	# for http and/or for repeating headers
294	# It's a continuation line.
295	hlist.append(line)
296	self.addcontinue(headerseen, line.strip())
297	continue
298	elif self.iscomment(line):
299	# It's a comment. Ignore it.
300	continue
301	elif self.islast(line):
302	# Note! No pushback here! The delimiter line gets eaten.
303	break
304	headerseen = self.isheader(line)
305	if headerseen:
306	# It's a legal header line, save it.
307	hlist.append(line)
308	self.addheader(headerseen, line[len(headerseen)+1:].strip())
309	continue
310	else:
311	# It's not a header line; throw it back and stop here.
312	if not self.dict:
313	self.status = 'No headers'
314	else:
315	self.status = 'Non-header line where header expected'
316	# Try to undo the read.
317	if unread:
318	unread(line)
319	elif tell:
320	self.fp.seek(startofline)
321	else:
322	self.status = self.status + '; bad seek'
323	break
324
325	class HTTPResponse:
326
327	# strict: If true, raise BadStatusLine if the status line can't be
328	# parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
329	# false because it prevents clients from talking to HTTP/0.9
330	# servers. Note that a response with a sufficiently corrupted
331	# status line will look like an HTTP/0.9 response.
332
333	# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
334
335	def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
336	if buffering:
337	# The caller won't be using any sock.recv() calls, so buffering
338	# is fine and recommended for performance.
339	self.fp = sock.makefile('rb')
340	else:
341	# The buffer size is specified as zero, because the headers of
342	# the response are read with readline(). If the reads were
343	# buffered the readline() calls could consume some of the
344	# response, which make be read via a recv() on the underlying
345	# socket.
346	self.fp = sock.makefile('rb', 0)
347	self.debuglevel = debuglevel
348	self.strict = strict
349	self._method = method
350
351	self.msg = None
352
353	# from the Status-Line of the response
354	self.version = _UNKNOWN # HTTP-Version
355	self.status = _UNKNOWN # Status-Code
356	self.reason = _UNKNOWN # Reason-Phrase
357
358	self.chunked = _UNKNOWN # is "chunked" being used?
359	self.chunk_left = _UNKNOWN # bytes left to read in current chunk
360	self.length = _UNKNOWN # number of bytes left in response
361	self.will_close = _UNKNOWN # conn will close at end of response
362
363	def _read_status(self):
364	# Initialize with Simple-Response defaults
365	line = self.fp.readline(_MAXLINE + 1)
366	if len(line) > _MAXLINE:
367	raise LineTooLong("header line")
368	if self.debuglevel > 0:
369	print "reply:", repr(line)
370	if not line:
371	# Presumably, the server closed the connection before
372	# sending a valid response.
373	raise BadStatusLine(line)
374	try:
375	[version, status, reason] = line.split(None, 2)
376	except ValueError:
377	try:
378	[version, status] = line.split(None, 1)
379	reason = ""
380	except ValueError:
381	# empty version will cause next test to fail and status
382	# will be treated as 0.9 response.
383	version = ""
384	if not version.startswith('HTTP/'):
385	if self.strict:
386	self.close()
387	raise BadStatusLine(line)
388	else:
389	# assume it's a Simple-Response from an 0.9 server
390	self.fp = LineAndFileWrapper(line, self.fp)
391	return "HTTP/0.9", 200, ""
392
393	# The status code is a three-digit number
394	try:
395	status = int(status)
396	if status < 100 or status > 999:
397	raise BadStatusLine(line)
398	except ValueError:
399	raise BadStatusLine(line)
400	return version, status, reason
401
402	def begin(self):
403	if self.msg is not None:
404	# we've already started reading the response
405	return
406
407	# read until we get a non-100 response
408	while True:
409	version, status, reason = self._read_status()
410	if status != CONTINUE:
411	break
412	# skip the header from the 100 response
413	while True:
414	skip = self.fp.readline(_MAXLINE + 1)
415	if len(skip) > _MAXLINE:
416	raise LineTooLong("header line")
417	skip = skip.strip()
418	if not skip:
419	break
420	if self.debuglevel > 0:
421	print "header:", skip
422
423	self.status = status
424	self.reason = reason.strip()
425	if version == 'HTTP/1.0':
426	self.version = 10
427	elif version.startswith('HTTP/1.'):
428	self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
429	elif version == 'HTTP/0.9':
430	self.version = 9
431	else:
432	raise UnknownProtocol(version)
433
434	if self.version == 9:
435	self.length = None
436	self.chunked = 0
437	self.will_close = 1
438	self.msg = HTTPMessage(StringIO())
439	return
440
441	self.msg = HTTPMessage(self.fp, 0)
442	if self.debuglevel > 0:
443	for hdr in self.msg.headers:
444	print "header:", hdr,
445
446	# don't let the msg keep an fp
447	self.msg.fp = None
448
449	# are we using the chunked-style of transfer encoding?
450	tr_enc = self.msg.getheader('transfer-encoding')
451	if tr_enc and tr_enc.lower() == "chunked":
452	self.chunked = 1
453	self.chunk_left = None
454	else:
455	self.chunked = 0
456
457	# will the connection close at the end of the response?
458	self.will_close = self._check_close()
459
460	# do we have a Content-Length?
461	# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
462	length = self.msg.getheader('content-length')
463	if length and not self.chunked:
464	try:
465	self.length = int(length)
466	except ValueError:
467	self.length = None
468	else:
469	if self.length < 0: # ignore nonsensical negative lengths
470	self.length = None
471	else:
472	self.length = None
473
474	# does the body have a fixed length? (of zero)
475	if (status == NO_CONTENT or status == NOT_MODIFIED or
476	100 <= status < 200 or # 1xx codes
477	self._method == 'HEAD'):
478	self.length = 0
479
480	# if the connection remains open, and we aren't using chunked, and
481	# a content-length was not provided, then assume that the connection
482	# WILL close.
483	if not self.will_close and \
484	not self.chunked and \
485	self.length is None:
486	self.will_close = 1
487
488	def _check_close(self):
489	conn = self.msg.getheader('connection')
490	if self.version == 11:
491	# An HTTP/1.1 proxy is assumed to stay open unless
492	# explicitly closed.
493	conn = self.msg.getheader('connection')
494	if conn and "close" in conn.lower():
495	return True
496	return False
497
498	# Some HTTP/1.0 implementations have support for persistent
499	# connections, using rules different than HTTP/1.1.
500
501	# For older HTTP, Keep-Alive indicates persistent connection.
502	if self.msg.getheader('keep-alive'):
503	return False
504
505	# At least Akamai returns a "Connection: Keep-Alive" header,
506	# which was supposed to be sent by the client.
507	if conn and "keep-alive" in conn.lower():
508	return False
509
510	# Proxy-Connection is a netscape hack.
511	pconn = self.msg.getheader('proxy-connection')
512	if pconn and "keep-alive" in pconn.lower():
513	return False
514
515	# otherwise, assume it will close
516	return True
517
518	def close(self):
519	if self.fp:
520	self.fp.close()
521	self.fp = None
522
523	def isclosed(self):
524	# NOTE: it is possible that we will not ever call self.close(). This
525	# case occurs when will_close is TRUE, length is None, and we
526	# read up to the last byte, but NOT past it.
527	#
528	# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
529	# called, meaning self.isclosed() is meaningful.
530	return self.fp is None
531
532	# XXX It would be nice to have readline and __iter__ for this, too.
533
534	def read(self, amt=None):
535	if self.fp is None:
536	return ''
537
538	if self._method == 'HEAD':
539	self.close()
540	return ''
541
542	if self.chunked:
543	return self._read_chunked(amt)
544
545	if amt is None:
546	# unbounded read
547	if self.length is None:
548	s = self.fp.read()
549	else:
550	try:
551	s = self._safe_read(self.length)
552	except IncompleteRead:
553	self.close()
554	raise
555	self.length = 0
556	self.close() # we read everything
557	return s
558
559	if self.length is not None:
560	if amt > self.length:
561	# clip the read to the "end of response"
562	amt = self.length
563
564	# we do not use _safe_read() here because this may be a .will_close
565	# connection, and the user is reading more bytes than will be provided
566	# (for example, reading in 1k chunks)
567	s = self.fp.read(amt)
568	if not s:
569	# Ideally, we would raise IncompleteRead if the content-length
570	# wasn't satisfied, but it might break compatibility.
571	self.close()
572	if self.length is not None:
573	self.length -= len(s)
574	if not self.length:
575	self.close()
576
577	return s
578
579	def _read_chunked(self, amt):
580	assert self.chunked != _UNKNOWN
581	chunk_left = self.chunk_left
582	value = []
583	while True:
584	if chunk_left is None:
585	line = self.fp.readline(_MAXLINE + 1)
586	if len(line) > _MAXLINE:
587	raise LineTooLong("chunk size")
588	i = line.find(';')
589	if i >= 0:
590	line = line[:i] # strip chunk-extensions
591	try:
592	chunk_left = int(line, 16)
593	except ValueError:
594	# close the connection as protocol synchronisation is
595	# probably lost
596	self.close()
597	raise IncompleteRead(''.join(value))
598	if chunk_left == 0:
599	break
600	if amt is None:
601	value.append(self._safe_read(chunk_left))
602	elif amt < chunk_left:
603	value.append(self._safe_read(amt))
604	self.chunk_left = chunk_left - amt
605	return ''.join(value)
606	elif amt == chunk_left:
607	value.append(self._safe_read(amt))
608	self._safe_read(2) # toss the CRLF at the end of the chunk
609	self.chunk_left = None
610	return ''.join(value)
611	else:
612	value.append(self._safe_read(chunk_left))
613	amt -= chunk_left
614
615	# we read the whole chunk, get another
616	self._safe_read(2) # toss the CRLF at the end of the chunk
617	chunk_left = None
618
619	# read and discard trailer up to the CRLF terminator
620	### note: we shouldn't have any trailers!
621	while True:
622	line = self.fp.readline(_MAXLINE + 1)
623	if len(line) > _MAXLINE:
624	raise LineTooLong("trailer line")
625	if not line:
626	# a vanishingly small number of sites EOF without
627	# sending the trailer
628	break
629	if line == '\r\n':
630	break
631
632	# we read everything; close the "file"
633	self.close()
634
635	return ''.join(value)
636
637	def _safe_read(self, amt):
638	"""Read the number of bytes requested, compensating for partial reads.
639
640	Normally, we have a blocking socket, but a read() can be interrupted
641	by a signal (resulting in a partial read).
642
643	Note that we cannot distinguish between EOF and an interrupt when zero
644	bytes have been read. IncompleteRead() will be raised in this
645	situation.
646
647	This function should be used when <amt> bytes "should" be present for
648	reading. If the bytes are truly not available (due to EOF), then the
649	IncompleteRead exception can be used to detect the problem.
650	"""
651	# NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never
652	# return less than x bytes unless EOF is encountered. It now handles
653	# signal interruptions (socket.error EINTR) internally. This code
654	# never caught that exception anyways. It seems largely pointless.
655	# self.fp.read(amt) will work fine.
656	s = []
657	while amt > 0:
658	chunk = self.fp.read(min(amt, MAXAMOUNT))
659	if not chunk:
660	raise IncompleteRead(''.join(s), amt)
661	s.append(chunk)
662	amt -= len(chunk)
663	return ''.join(s)
664
665	def fileno(self):
666	return self.fp.fileno()
667
668	def getheader(self, name, default=None):
669	if self.msg is None:
670	raise ResponseNotReady()
671	return self.msg.getheader(name, default)
672
673	def getheaders(self):
674	"""Return list of (header, value) tuples."""
675	if self.msg is None:
676	raise ResponseNotReady()
677	return self.msg.items()
678
679
680	class HTTPConnection:
681
682	_http_vsn = 11
683	_http_vsn_str = 'HTTP/1.1'
684
685	response_class = HTTPResponse
686	default_port = HTTP_PORT
687	auto_open = 1
688	debuglevel = 0
689	strict = 0
690
691	def __init__(self, host, port=None, strict=None,
692	timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
693	self.timeout = timeout
694	self.source_address = source_address
695	self.sock = None
696	self._buffer = []
697	self.__response = None
698	self.__state = _CS_IDLE
699	self._method = None
700	self._tunnel_host = None
701	self._tunnel_port = None
702	self._tunnel_headers = {}
703
704	self._set_hostport(host, port)
705	if strict is not None:
706	self.strict = strict
707
708	def set_tunnel(self, host, port=None, headers=None):
709	""" Sets up the host and the port for the HTTP CONNECT Tunnelling.
710
711	The headers argument should be a mapping of extra HTTP headers
712	to send with the CONNECT request.
713	"""
714	self._tunnel_host = host
715	self._tunnel_port = port
716	if headers:
717	self._tunnel_headers = headers
718	else:
719	self._tunnel_headers.clear()
720
721	def _set_hostport(self, host, port):
722	if port is None:
723	i = host.rfind(':')
724	j = host.rfind(']') # ipv6 addresses have [...]
725	if i > j:
726	try:
727	port = int(host[i+1:])
728	except ValueError:
729	if host[i+1:] == "": # http://foo.com:/ == http://foo.com/
730	port = self.default_port
731	else:
732	raise InvalidURL("nonnumeric port: '%s'" % host[i+1:])
733	host = host[:i]
734	else:
735	port = self.default_port
736	if host and host[0] == '[' and host[-1] == ']':
737	host = host[1:-1]
738	self.host = host
739	self.port = port
740
741	def set_debuglevel(self, level):
742	self.debuglevel = level
743
744	def _tunnel(self):
745	self._set_hostport(self._tunnel_host, self._tunnel_port)
746	self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port))
747	for header, value in self._tunnel_headers.iteritems():
748	self.send("%s: %s\r\n" % (header, value))
749	self.send("\r\n")
750	response = self.response_class(self.sock, strict = self.strict,
751	method = self._method)
752	(version, code, message) = response._read_status()
753
754	if code != 200:
755	self.close()
756	raise socket.error("Tunnel connection failed: %d %s" % (code,
757	message.strip()))
758	while True:
759	line = response.fp.readline(_MAXLINE + 1)
760	if len(line) > _MAXLINE:
761	raise LineTooLong("header line")
762	if not line:
763	# for sites which EOF without sending trailer
764	break
765	if line == '\r\n':
766	break
767
768
769	def connect(self):
770	"""Connect to the host and port specified in __init__."""
771	self.sock = socket.create_connection((self.host,self.port),
772	self.timeout, self.source_address)
773
774	if self._tunnel_host:
775	self._tunnel()
776
777	def close(self):
778	"""Close the connection to the HTTP server."""
779	if self.sock:
780	self.sock.close() # close it manually... there may be other refs
781	self.sock = None
782	if self.__response:
783	self.__response.close()
784	self.__response = None
785	self.__state = _CS_IDLE
786
787	def send(self, data):
788	"""Send `data' to the server."""
789	if self.sock is None:
790	if self.auto_open:
791	self.connect()
792	else:
793	raise NotConnected()
794
795	if self.debuglevel > 0:
796	print "send:", repr(data)
797	blocksize = 8192
798	if hasattr(data,'read') and not isinstance(data, array):
799	if self.debuglevel > 0: print "sendIng a read()able"
800	datablock = data.read(blocksize)
801	while datablock:
802	self.sock.sendall(datablock)
803	datablock = data.read(blocksize)
804	else:
805	self.sock.sendall(data)
806
807	def _output(self, s):
808	"""Add a line of output to the current request buffer.
809
810	Assumes that the line does not end with \\r\\n.
811	"""
812	self._buffer.append(s)
813
814	def _send_output(self, message_body=None):
815	"""Send the currently buffered request and clear the buffer.
816
817	Appends an extra \\r\\n to the buffer.
818	A message_body may be specified, to be appended to the request.
819	"""
820	self._buffer.extend(("", ""))
821	msg = "\r\n".join(self._buffer)
822	del self._buffer[:]
823	# If msg and message_body are sent in a single send() call,
824	# it will avoid performance problems caused by the interaction
825	# between delayed ack and the Nagle algorithm.
826	if isinstance(message_body, str):
827	msg += message_body
828	message_body = None
829	self.send(msg)
830	if message_body is not None:
831	#message_body was not a string (i.e. it is a file) and
832	#we must run the risk of Nagle
833	self.send(message_body)
834
835	def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0):
836	"""Send a request to the server.
837
838	`method' specifies an HTTP request method, e.g. 'GET'.
839	`url' specifies the object being requested, e.g. '/index.html'.
840	`skip_host' if True does not add automatically a 'Host:' header
841	`skip_accept_encoding' if True does not add automatically an
842	'Accept-Encoding:' header
843	"""
844
845	# if a prior response has been completed, then forget about it.
846	if self.__response and self.__response.isclosed():
847	self.__response = None
848
849
850	# in certain cases, we cannot issue another request on this connection.
851	# this occurs when:
852	# 1) we are in the process of sending a request. (_CS_REQ_STARTED)
853	# 2) a response to a previous request has signalled that it is going
854	# to close the connection upon completion.
855	# 3) the headers for the previous response have not been read, thus
856	# we cannot determine whether point (2) is true. (_CS_REQ_SENT)
857	#
858	# if there is no prior response, then we can request at will.
859	#
860	# if point (2) is true, then we will have passed the socket to the
861	# response (effectively meaning, "there is no prior response"), and
862	# will open a new one when a new request is made.
863	#
864	# Note: if a prior response exists, then we can start a new request.
865	# We are not allowed to begin fetching the response to this new
866	# request, however, until that prior response is complete.
867	#
868	if self.__state == _CS_IDLE:
869	self.__state = _CS_REQ_STARTED
870	else:
871	raise CannotSendRequest()
872
873	# Save the method we use, we need it later in the response phase
874	self._method = method
875	if not url:
876	url = '/'
877	hdr = '%s %s %s' % (method, url, self._http_vsn_str)
878
879	self._output(hdr)
880
881	if self._http_vsn == 11:
882	# Issue some standard headers for better HTTP/1.1 compliance
883
884	if not skip_host:
885	# this header is issued only for HTTP/1.1
886	# connections. more specifically, this means it is
887	# only issued when the client uses the new
888	# HTTPConnection() class. backwards-compat clients
889	# will be using HTTP/1.0 and those clients may be
890	# issuing this header themselves. we should NOT issue
891	# it twice; some web servers (such as Apache) barf
892	# when they see two Host: headers
893
894	# If we need a non-standard port,include it in the
895	# header. If the request is going through a proxy,
896	# but the host of the actual URL, not the host of the
897	# proxy.
898
899	netloc = ''
900	if url.startswith('http'):
901	nil, netloc, nil, nil, nil = urlsplit(url)
902
903	if netloc:
904	try:
905	netloc_enc = netloc.encode("ascii")
906	except UnicodeEncodeError:
907	netloc_enc = netloc.encode("idna")
908	self.putheader('Host', netloc_enc)
909	else:
910	try:
911	host_enc = self.host.encode("ascii")
912	except UnicodeEncodeError:
913	host_enc = self.host.encode("idna")
914	# Wrap the IPv6 Host Header with [] (RFC 2732)
915	if host_enc.find(':') >= 0:
916	host_enc = "[" + host_enc + "]"
917	if self.port == self.default_port:
918	self.putheader('Host', host_enc)
919	else:
920	self.putheader('Host', "%s:%s" % (host_enc, self.port))
921
922	# note: we are assuming that clients will not attempt to set these
923	# headers since this library must deal with the
924	# consequences. this also means that when the supporting
925	# libraries are updated to recognize other forms, then this
926	# code should be changed (removed or updated).
927
928	# we only want a Content-Encoding of "identity" since we don't
929	# support encodings such as x-gzip or x-deflate.
930	if not skip_accept_encoding:
931	self.putheader('Accept-Encoding', 'identity')
932
933	# we can accept "chunked" Transfer-Encodings, but no others
934	# NOTE: no TE header implies only "chunked"
935	#self.putheader('TE', 'chunked')
936
937	# if TE is supplied in the header, then it must appear in a
938	# Connection header.
939	#self.putheader('Connection', 'TE')
940
941	else:
942	# For HTTP/1.0, the server will assume "not chunked"
943	pass
944
945	def putheader(self, header, *values):
946	"""Send a request header line to the server.
947
948	For example: h.putheader('Accept', 'text/html')
949	"""
950	if self.__state != _CS_REQ_STARTED:
951	raise CannotSendHeader()
952
953	hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values]))
954	self._output(hdr)
955
956	def endheaders(self, message_body=None):
957	"""Indicate that the last header line has been sent to the server.
958
959	This method sends the request to the server. The optional
960	message_body argument can be used to pass a message body
961	associated with the request. The message body will be sent in
962	the same packet as the message headers if it is string, otherwise it is
963	sent as a separate packet.
964	"""
965	if self.__state == _CS_REQ_STARTED:
966	self.__state = _CS_REQ_SENT
967	else:
968	raise CannotSendHeader()
969	self._send_output(message_body)
970
971	def request(self, method, url, body=None, headers={}):
972	"""Send a complete request to the server."""
973	self._send_request(method, url, body, headers)
974
975	def _set_content_length(self, body):
976	# Set the content-length based on the body.
977	thelen = None
978	try:
979	thelen = str(len(body))
980	except TypeError, te:
981	# If this is a file-like object, try to
982	# fstat its file descriptor
983	try:
984	thelen = str(os.fstat(body.fileno()).st_size)
985	except (AttributeError, OSError):
986	# Don't send a length if this failed
987	if self.debuglevel > 0: print "Cannot stat!!"
988
989	if thelen is not None:
990	self.putheader('Content-Length', thelen)
991
992	def _send_request(self, method, url, body, headers):
993	# Honor explicitly requested Host: and Accept-Encoding: headers.
994	header_names = dict.fromkeys([k.lower() for k in headers])
995	skips = {}
996	if 'host' in header_names:
997	skips['skip_host'] = 1
998	if 'accept-encoding' in header_names:
999	skips['skip_accept_encoding'] = 1
1000
1001	self.putrequest(method, url, **skips)
1002
1003	if body is not None and 'content-length' not in header_names:
1004	self._set_content_length(body)
1005	for hdr, value in headers.iteritems():
1006	self.putheader(hdr, value)
1007	self.endheaders(body)
1008
1009	def getresponse(self, buffering=False):
1010	"Get the response from the server."
1011
1012	# if a prior response has been completed, then forget about it.
1013	if self.__response and self.__response.isclosed():
1014	self.__response = None
1015
1016	#
1017	# if a prior response exists, then it must be completed (otherwise, we
1018	# cannot read this response's header to determine the connection-close
1019	# behavior)
1020	#
1021	# note: if a prior response existed, but was connection-close, then the
1022	# socket and response were made independent of this HTTPConnection
1023	# object since a new request requires that we open a whole new
1024	# connection
1025	#
1026	# this means the prior response had one of two states:
1027	# 1) will_close: this connection was reset and the prior socket and
1028	# response operate independently
1029	# 2) persistent: the response was retained and we await its
1030	# isclosed() status to become true.
1031	#
1032	if self.__state != _CS_REQ_SENT or self.__response:
1033	raise ResponseNotReady()
1034
1035	args = (self.sock,)
1036	kwds = {"strict":self.strict, "method":self._method}
1037	if self.debuglevel > 0:
1038	args += (self.debuglevel,)
1039	if buffering:
1040	#only add this keyword if non-default, for compatibility with
1041	#other response_classes.
1042	kwds["buffering"] = True;
1043	response = self.response_class(args, *kwds)
1044
1045	response.begin()
1046	assert response.will_close != _UNKNOWN
1047	self.__state = _CS_IDLE
1048
1049	if response.will_close:
1050	# this effectively passes the connection to the response
1051	self.close()
1052	else:
1053	# remember this, so we can tell when it is complete
1054	self.__response = response
1055
1056	return response
1057
1058
1059	class HTTP:
1060	"Compatibility class with httplib.py from 1.5."
1061
1062	_http_vsn = 10
1063	_http_vsn_str = 'HTTP/1.0'
1064
1065	debuglevel = 0
1066
1067	_connection_class = HTTPConnection
1068
1069	def __init__(self, host='', port=None, strict=None):
1070	"Provide a default host, since the superclass requires one."
1071
1072	# some joker passed 0 explicitly, meaning default port
1073	if port == 0:
1074	port = None
1075
1076	# Note that we may pass an empty string as the host; this will raise
1077	# an error when we attempt to connect. Presumably, the client code
1078	# will call connect before then, with a proper host.
1079	self._setup(self._connection_class(host, port, strict))
1080
1081	def _setup(self, conn):
1082	self._conn = conn
1083
1084	# set up delegation to flesh out interface
1085	self.send = conn.send
1086	self.putrequest = conn.putrequest
1087	self.putheader = conn.putheader
1088	self.endheaders = conn.endheaders
1089	self.set_debuglevel = conn.set_debuglevel
1090
1091	conn._http_vsn = self._http_vsn
1092	conn._http_vsn_str = self._http_vsn_str
1093
1094	self.file = None
1095
1096	def connect(self, host=None, port=None):
1097	"Accept arguments to set the host/port, since the superclass doesn't."
1098
1099	if host is not None:
1100	self._conn._set_hostport(host, port)
1101	self._conn.connect()
1102
1103	def getfile(self):
1104	"Provide a getfile, since the superclass' does not use this concept."
1105	return self.file
1106
1107	def getreply(self, buffering=False):
1108	"""Compat definition since superclass does not define it.
1109
1110	Returns a tuple consisting of:
1111	- server status code (e.g. '200' if all goes well)
1112	- server "reason" corresponding to status code
1113	- any RFC822 headers in the response from the server
1114	"""
1115	try:
1116	if not buffering:
1117	response = self._conn.getresponse()
1118	else:
1119	#only add this keyword if non-default for compatibility
1120	#with other connection classes
1121	response = self._conn.getresponse(buffering)
1122	except BadStatusLine, e:
1123	### hmm. if getresponse() ever closes the socket on a bad request,
1124	### then we are going to have problems with self.sock
1125
1126	### should we keep this behavior? do people use it?
1127	# keep the socket open (as a file), and return it
1128	self.file = self._conn.sock.makefile('rb', 0)
1129
1130	# close our socket -- we want to restart after any protocol error
1131	self.close()
1132
1133	self.headers = None
1134	return -1, e.line, None
1135
1136	self.headers = response.msg
1137	self.file = response.fp
1138	return response.status, response.reason, response.msg
1139
1140	def close(self):
1141	self._conn.close()
1142
1143	# note that self.file == response.fp, which gets closed by the
1144	# superclass. just clear the object ref here.
1145	### hmm. messy. if status==-1, then self.file is owned by us.
1146	### well... we aren't explicitly closing, but losing this ref will
1147	### do it
1148	self.file = None
1149
1150	try:
1151	import ssl
1152	except ImportError:
1153	pass
1154	else:
1155	class HTTPSConnection(HTTPConnection):
1156	"This class allows communication via SSL."
1157
1158	default_port = HTTPS_PORT
1159
1160	def __init__(self, host, port=None, key_file=None, cert_file=None,
1161	strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
1162	source_address=None):
1163	HTTPConnection.__init__(self, host, port, strict, timeout,
1164	source_address)
1165	self.key_file = key_file
1166	self.cert_file = cert_file
1167
1168	def connect(self):
1169	"Connect to a host on a given (SSL) port."
1170
1171	sock = socket.create_connection((self.host, self.port),
1172	self.timeout, self.source_address)
1173	if self._tunnel_host:
1174	self.sock = sock
1175	self._tunnel()
1176	self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file)
1177
1178	__all__.append("HTTPSConnection")
1179
1180	class HTTPS(HTTP):
1181	"""Compatibility with 1.5 httplib interface
1182
1183	Python 1.5.2 did not have an HTTPS class, but it defined an
1184	interface for sending http requests that is also useful for
1185	https.
1186	"""
1187
1188	_connection_class = HTTPSConnection
1189
1190	def __init__(self, host='', port=None, key_file=None, cert_file=None,
1191	strict=None):
1192	# provide a default host, pass the X509 cert info
1193
1194	# urf. compensate for bad input.
1195	if port == 0:
1196	port = None
1197	self._setup(self._connection_class(host, port, key_file,
1198	cert_file, strict))
1199
1200	# we never actually use these for anything, but we keep them
1201	# here for compatibility with post-1.5.2 CVS.
1202	self.key_file = key_file
1203	self.cert_file = cert_file
1204
1205
1206	def FakeSocket (sock, sslobj):
1207	warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " +
1208	"Use the result of ssl.wrap_socket() directly instead.",
1209	DeprecationWarning, stacklevel=2)
1210	return sslobj
1211
1212
1213	class HTTPException(Exception):
1214	# Subclasses that define an __init__ must call Exception.__init__
1215	# or define self.args. Otherwise, str() will fail.
1216	pass
1217
1218	class NotConnected(HTTPException):
1219	pass
1220
1221	class InvalidURL(HTTPException):
1222	pass
1223
1224	class UnknownProtocol(HTTPException):
1225	def __init__(self, version):
1226	self.args = version,
1227	self.version = version
1228
1229	class UnknownTransferEncoding(HTTPException):
1230	pass
1231
1232	class UnimplementedFileMode(HTTPException):
1233	pass
1234
1235	class IncompleteRead(HTTPException):
1236	def __init__(self, partial, expected=None):
1237	self.args = partial,
1238	self.partial = partial
1239	self.expected = expected
1240	def __repr__(self):
1241	if self.expected is not None:
1242	e = ', %i more expected' % self.expected
1243	else:
1244	e = ''
1245	return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e)
1246	def __str__(self):
1247	return repr(self)
1248
1249	class ImproperConnectionState(HTTPException):
1250	pass
1251
1252	class CannotSendRequest(ImproperConnectionState):
1253	pass
1254
1255	class CannotSendHeader(ImproperConnectionState):
1256	pass
1257
1258	class ResponseNotReady(ImproperConnectionState):
1259	pass
1260
1261	class BadStatusLine(HTTPException):
1262	def __init__(self, line):
1263	if not line:
1264	line = repr(line)
1265	self.args = line,
1266	self.line = line
1267
1268	class LineTooLong(HTTPException):
1269	def __init__(self, line_type):
1270	HTTPException.__init__(self, "got more than %d bytes when reading %s"
1271	% (_MAXLINE, line_type))
1272
1273	# for backwards compatibility
1274	error = HTTPException
1275
1276	class LineAndFileWrapper:
1277	"""A limited file-like object for HTTP/0.9 responses."""
1278
1279	# The status-line parsing code calls readline(), which normally
1280	# get the HTTP status line. For a 0.9 response, however, this is
1281	# actually the first line of the body! Clients need to get a
1282	# readable file object that contains that line.
1283
1284	def __init__(self, line, file):
1285	self._line = line
1286	self._file = file
1287	self._line_consumed = 0
1288	self._line_offset = 0
1289	self._line_left = len(line)
1290
1291	def __getattr__(self, attr):
1292	return getattr(self._file, attr)
1293
1294	def _done(self):
1295	# called when the last byte is read from the line. After the
1296	# call, all read methods are delegated to the underlying file
1297	# object.
1298	self._line_consumed = 1
1299	self.read = self._file.read
1300	self.readline = self._file.readline
1301	self.readlines = self._file.readlines
1302
1303	def read(self, amt=None):
1304	if self._line_consumed:
1305	return self._file.read(amt)
1306	assert self._line_left
1307	if amt is None or amt > self._line_left:
1308	s = self._line[self._line_offset:]
1309	self._done()
1310	if amt is None:
1311	return s + self._file.read()
1312	else:
1313	return s + self._file.read(amt - len(s))
1314	else:
1315	assert amt <= self._line_left
1316	i = self._line_offset
1317	j = i + amt
1318	s = self._line[i:j]
1319	self._line_offset = j
1320	self._line_left -= amt
1321	if self._line_left == 0:
1322	self._done()
1323	return s
1324
1325	def readline(self):
1326	if self._line_consumed:
1327	return self._file.readline()
1328	assert self._line_left
1329	s = self._line[self._line_offset:]
1330	self._done()
1331	return s
1332
1333	def readlines(self, size=None):
1334	if self._line_consumed:
1335	return self._file.readlines(size)
1336	assert self._line_left
1337	L = [self._line[self._line_offset:]]
1338	self._done()
1339	if size is None:
1340	return L + self._file.readlines()
1341	else:
1342	return L + self._file.readlines(size)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/httplib.py@ 610

Download in other formats: