source: trunk/essentials/dev-lang/python/Doc/lib/liburllib2.tex

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 32.0 KB
Line 
1\section{\module{urllib2} ---
2 extensible library for opening URLs}
3
4\declaremodule{standard}{urllib2}
5\moduleauthor{Jeremy Hylton}{jhylton@users.sourceforge.net}
6\sectionauthor{Moshe Zadka}{moshez@users.sourceforge.net}
7
8\modulesynopsis{An extensible library for opening URLs using a variety of
9 protocols}
10
11The \module{urllib2} module defines functions and classes which help
12in opening URLs (mostly HTTP) in a complex world --- basic and digest
13authentication, redirections, cookies and more.
14
15The \module{urllib2} module defines the following functions:
16
17\begin{funcdesc}{urlopen}{url\optional{, data}}
18Open the URL \var{url}, which can be either a string or a \class{Request}
19object.
20
21\var{data} may be a string specifying additional data to send to the
22server, or \code{None} if no such data is needed.
23Currently HTTP requests are the only ones that use \var{data};
24the HTTP request will be a POST instead of a GET when the \var{data}
25parameter is provided. \var{data} should be a buffer in the standard
26\mimetype{application/x-www-form-urlencoded} format. The
27\function{urllib.urlencode()} function takes a mapping or sequence of
282-tuples and returns a string in this format.
29
30This function returns a file-like object with two additional methods:
31
32\begin{itemize}
33 \item \method{geturl()} --- return the URL of the resource retrieved
34 \item \method{info()} --- return the meta-information of the page, as
35 a dictionary-like object
36\end{itemize}
37
38Raises \exception{URLError} on errors.
39
40Note that \code{None} may be returned if no handler handles the
41request (though the default installed global \class{OpenerDirector}
42uses \class{UnknownHandler} to ensure this never happens).
43\end{funcdesc}
44
45\begin{funcdesc}{install_opener}{opener}
46Install an \class{OpenerDirector} instance as the default global
47opener. Installing an opener is only necessary if you want urlopen to
48use that opener; otherwise, simply call \method{OpenerDirector.open()}
49instead of \function{urlopen()}. The code does not check for a real
50\class{OpenerDirector}, and any class with the appropriate interface
51will work.
52\end{funcdesc}
53
54\begin{funcdesc}{build_opener}{\optional{handler, \moreargs}}
55Return an \class{OpenerDirector} instance, which chains the
56handlers in the order given. \var{handler}s can be either instances
57of \class{BaseHandler}, or subclasses of \class{BaseHandler} (in
58which case it must be possible to call the constructor without
59any parameters). Instances of the following classes will be in
60front of the \var{handler}s, unless the \var{handler}s contain
61them, instances of them or subclasses of them:
62\class{ProxyHandler}, \class{UnknownHandler}, \class{HTTPHandler},
63\class{HTTPDefaultErrorHandler}, \class{HTTPRedirectHandler},
64\class{FTPHandler}, \class{FileHandler}, \class{HTTPErrorProcessor}.
65
66If the Python installation has SSL support (\function{socket.ssl()}
67exists), \class{HTTPSHandler} will also be added.
68
69Beginning in Python 2.3, a \class{BaseHandler} subclass may also
70change its \member{handler_order} member variable to modify its
71position in the handlers list.
72\end{funcdesc}
73
74
75The following exceptions are raised as appropriate:
76
77\begin{excdesc}{URLError}
78The handlers raise this exception (or derived exceptions) when they
79run into a problem. It is a subclass of \exception{IOError}.
80\end{excdesc}
81
82\begin{excdesc}{HTTPError}
83A subclass of \exception{URLError}, it can also function as a
84non-exceptional file-like return value (the same thing that
85\function{urlopen()} returns). This is useful when handling exotic
86HTTP errors, such as requests for authentication.
87\end{excdesc}
88
89\begin{excdesc}{GopherError}
90A subclass of \exception{URLError}, this is the error raised by the
91Gopher handler.
92\end{excdesc}
93
94
95The following classes are provided:
96
97\begin{classdesc}{Request}{url\optional{, data}\optional{, headers}
98 \optional{, origin_req_host}\optional{, unverifiable}}
99This class is an abstraction of a URL request.
100
101\var{url} should be a string containing a valid URL.
102
103\var{data} may be a string specifying additional data to send to the
104server, or \code{None} if no such data is needed.
105Currently HTTP requests are the only ones that use \var{data};
106the HTTP request will be a POST instead of a GET when the \var{data}
107parameter is provided. \var{data} should be a buffer in the standard
108\mimetype{application/x-www-form-urlencoded} format. The
109\function{urllib.urlencode()} function takes a mapping or sequence of
1102-tuples and returns a string in this format.
111
112\var{headers} should be a dictionary, and will be treated as if
113\method{add_header()} was called with each key and value as arguments.
114
115The final two arguments are only of interest for correct handling of
116third-party HTTP cookies:
117
118\var{origin_req_host} should be the request-host of the origin
119transaction, as defined by \rfc{2965}. It defaults to
120\code{cookielib.request_host(self)}. This is the host name or IP
121address of the original request that was initiated by the user. For
122example, if the request is for an image in an HTML document, this
123should be the request-host of the request for the page containing the
124image.
125
126\var{unverifiable} should indicate whether the request is
127unverifiable, as defined by RFC 2965. It defaults to False. An
128unverifiable request is one whose URL the user did not have the option
129to approve. For example, if the request is for an image in an HTML
130document, and the user had no option to approve the automatic fetching
131of the image, this should be true.
132\end{classdesc}
133
134\begin{classdesc}{OpenerDirector}{}
135The \class{OpenerDirector} class opens URLs via \class{BaseHandler}s
136chained together. It manages the chaining of handlers, and recovery
137from errors.
138\end{classdesc}
139
140\begin{classdesc}{BaseHandler}{}
141This is the base class for all registered handlers --- and handles only
142the simple mechanics of registration.
143\end{classdesc}
144
145\begin{classdesc}{HTTPDefaultErrorHandler}{}
146A class which defines a default handler for HTTP error responses; all
147responses are turned into \exception{HTTPError} exceptions.
148\end{classdesc}
149
150\begin{classdesc}{HTTPRedirectHandler}{}
151A class to handle redirections.
152\end{classdesc}
153
154\begin{classdesc}{HTTPCookieProcessor}{\optional{cookiejar}}
155A class to handle HTTP Cookies.
156\end{classdesc}
157
158\begin{classdesc}{ProxyHandler}{\optional{proxies}}
159Cause requests to go through a proxy.
160If \var{proxies} is given, it must be a dictionary mapping
161protocol names to URLs of proxies.
162The default is to read the list of proxies from the environment
163variables \envvar{<protocol>_proxy}.
164\end{classdesc}
165
166\begin{classdesc}{HTTPPasswordMgr}{}
167Keep a database of
168\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})}
169mappings.
170\end{classdesc}
171
172\begin{classdesc}{HTTPPasswordMgrWithDefaultRealm}{}
173Keep a database of
174\code{(\var{realm}, \var{uri}) -> (\var{user}, \var{password})} mappings.
175A realm of \code{None} is considered a catch-all realm, which is searched
176if no other realm fits.
177\end{classdesc}
178
179\begin{classdesc}{AbstractBasicAuthHandler}{\optional{password_mgr}}
180This is a mixin class that helps with HTTP authentication, both
181to the remote host and to a proxy.
182\var{password_mgr}, if given, should be something that is compatible
183with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
184for information on the interface that must be supported.
185\end{classdesc}
186
187\begin{classdesc}{HTTPBasicAuthHandler}{\optional{password_mgr}}
188Handle authentication with the remote host.
189\var{password_mgr}, if given, should be something that is compatible
190with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
191for information on the interface that must be supported.
192\end{classdesc}
193
194\begin{classdesc}{ProxyBasicAuthHandler}{\optional{password_mgr}}
195Handle authentication with the proxy.
196\var{password_mgr}, if given, should be something that is compatible
197with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
198for information on the interface that must be supported.
199\end{classdesc}
200
201\begin{classdesc}{AbstractDigestAuthHandler}{\optional{password_mgr}}
202This is a mixin class that helps with HTTP authentication, both
203to the remote host and to a proxy.
204\var{password_mgr}, if given, should be something that is compatible
205with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
206for information on the interface that must be supported.
207\end{classdesc}
208
209\begin{classdesc}{HTTPDigestAuthHandler}{\optional{password_mgr}}
210Handle authentication with the remote host.
211\var{password_mgr}, if given, should be something that is compatible
212with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
213for information on the interface that must be supported.
214\end{classdesc}
215
216\begin{classdesc}{ProxyDigestAuthHandler}{\optional{password_mgr}}
217Handle authentication with the proxy.
218\var{password_mgr}, if given, should be something that is compatible
219with \class{HTTPPasswordMgr}; refer to section~\ref{http-password-mgr}
220for information on the interface that must be supported.
221\end{classdesc}
222
223\begin{classdesc}{HTTPHandler}{}
224A class to handle opening of HTTP URLs.
225\end{classdesc}
226
227\begin{classdesc}{HTTPSHandler}{}
228A class to handle opening of HTTPS URLs.
229\end{classdesc}
230
231\begin{classdesc}{FileHandler}{}
232Open local files.
233\end{classdesc}
234
235\begin{classdesc}{FTPHandler}{}
236Open FTP URLs.
237\end{classdesc}
238
239\begin{classdesc}{CacheFTPHandler}{}
240Open FTP URLs, keeping a cache of open FTP connections to minimize
241delays.
242\end{classdesc}
243
244\begin{classdesc}{GopherHandler}{}
245Open gopher URLs.
246\end{classdesc}
247
248\begin{classdesc}{UnknownHandler}{}
249A catch-all class to handle unknown URLs.
250\end{classdesc}
251
252
253\subsection{Request Objects \label{request-objects}}
254
255The following methods describe all of \class{Request}'s public interface,
256and so all must be overridden in subclasses.
257
258\begin{methoddesc}[Request]{add_data}{data}
259Set the \class{Request} data to \var{data}. This is ignored by all
260handlers except HTTP handlers --- and there it should be a byte
261string, and will change the request to be \code{POST} rather than
262\code{GET}.
263\end{methoddesc}
264
265\begin{methoddesc}[Request]{get_method}{}
266Return a string indicating the HTTP request method. This is only
267meaningful for HTTP requests, and currently always returns
268\code{'GET'} or \code{'POST'}.
269\end{methoddesc}
270
271\begin{methoddesc}[Request]{has_data}{}
272Return whether the instance has a non-\code{None} data.
273\end{methoddesc}
274
275\begin{methoddesc}[Request]{get_data}{}
276Return the instance's data.
277\end{methoddesc}
278
279\begin{methoddesc}[Request]{add_header}{key, val}
280Add another header to the request. Headers are currently ignored by
281all handlers except HTTP handlers, where they are added to the list
282of headers sent to the server. Note that there cannot be more than
283one header with the same name, and later calls will overwrite
284previous calls in case the \var{key} collides. Currently, this is
285no loss of HTTP functionality, since all headers which have meaning
286when used more than once have a (header-specific) way of gaining the
287same functionality using only one header.
288\end{methoddesc}
289
290\begin{methoddesc}[Request]{add_unredirected_header}{key, header}
291Add a header that will not be added to a redirected request.
292\versionadded{2.4}
293\end{methoddesc}
294
295\begin{methoddesc}[Request]{has_header}{header}
296Return whether the instance has the named header (checks both regular
297and unredirected).
298\versionadded{2.4}
299\end{methoddesc}
300
301\begin{methoddesc}[Request]{get_full_url}{}
302Return the URL given in the constructor.
303\end{methoddesc}
304
305\begin{methoddesc}[Request]{get_type}{}
306Return the type of the URL --- also known as the scheme.
307\end{methoddesc}
308
309\begin{methoddesc}[Request]{get_host}{}
310Return the host to which a connection will be made.
311\end{methoddesc}
312
313\begin{methoddesc}[Request]{get_selector}{}
314Return the selector --- the part of the URL that is sent to
315the server.
316\end{methoddesc}
317
318\begin{methoddesc}[Request]{set_proxy}{host, type}
319Prepare the request by connecting to a proxy server. The \var{host}
320and \var{type} will replace those of the instance, and the instance's
321selector will be the original URL given in the constructor.
322\end{methoddesc}
323
324\begin{methoddesc}[Request]{get_origin_req_host}{}
325Return the request-host of the origin transaction, as defined by
326\rfc{2965}. See the documentation for the \class{Request}
327constructor.
328\end{methoddesc}
329
330\begin{methoddesc}[Request]{is_unverifiable}{}
331Return whether the request is unverifiable, as defined by RFC 2965.
332See the documentation for the \class{Request} constructor.
333\end{methoddesc}
334
335
336\subsection{OpenerDirector Objects \label{opener-director-objects}}
337
338\class{OpenerDirector} instances have the following methods:
339
340\begin{methoddesc}[OpenerDirector]{add_handler}{handler}
341\var{handler} should be an instance of \class{BaseHandler}. The
342following methods are searched, and added to the possible chains (note
343that HTTP errors are a special case).
344
345\begin{itemize}
346 \item \method{\var{protocol}_open()} ---
347 signal that the handler knows how to open \var{protocol} URLs.
348 \item \method{http_error_\var{type}()} ---
349 signal that the handler knows how to handle HTTP errors with HTTP
350 error code \var{type}.
351 \item \method{\var{protocol}_error()} ---
352 signal that the handler knows how to handle errors from
353 (non-\code{http}) \var{protocol}.
354 \item \method{\var{protocol}_request()} ---
355 signal that the handler knows how to pre-process \var{protocol}
356 requests.
357 \item \method{\var{protocol}_response()} ---
358 signal that the handler knows how to post-process \var{protocol}
359 responses.
360\end{itemize}
361\end{methoddesc}
362
363\begin{methoddesc}[OpenerDirector]{open}{url\optional{, data}}
364Open the given \var{url} (which can be a request object or a string),
365optionally passing the given \var{data}.
366Arguments, return values and exceptions raised are the same as those
367of \function{urlopen()} (which simply calls the \method{open()} method
368on the currently installed global \class{OpenerDirector}).
369\end{methoddesc}
370
371\begin{methoddesc}[OpenerDirector]{error}{proto\optional{,
372 arg\optional{, \moreargs}}}
373Handle an error of the given protocol. This will call the registered
374error handlers for the given protocol with the given arguments (which
375are protocol specific). The HTTP protocol is a special case which
376uses the HTTP response code to determine the specific error handler;
377refer to the \method{http_error_*()} methods of the handler classes.
378
379Return values and exceptions raised are the same as those
380of \function{urlopen()}.
381\end{methoddesc}
382
383OpenerDirector objects open URLs in three stages:
384
385The order in which these methods are called within each stage is
386determined by sorting the handler instances.
387
388\begin{enumerate}
389 \item Every handler with a method named like
390 \method{\var{protocol}_request()} has that method called to
391 pre-process the request.
392
393 \item Handlers with a method named like
394 \method{\var{protocol}_open()} are called to handle the request.
395 This stage ends when a handler either returns a
396 non-\constant{None} value (ie. a response), or raises an exception
397 (usually \exception{URLError}). Exceptions are allowed to propagate.
398
399 In fact, the above algorithm is first tried for methods named
400 \method{default_open}. If all such methods return
401 \constant{None}, the algorithm is repeated for methods named like
402 \method{\var{protocol}_open()}. If all such methods return
403 \constant{None}, the algorithm is repeated for methods named
404 \method{unknown_open()}.
405
406 Note that the implementation of these methods may involve calls of
407 the parent \class{OpenerDirector} instance's \method{.open()} and
408 \method{.error()} methods.
409
410 \item Every handler with a method named like
411 \method{\var{protocol}_response()} has that method called to
412 post-process the response.
413
414\end{enumerate}
415
416\subsection{BaseHandler Objects \label{base-handler-objects}}
417
418\class{BaseHandler} objects provide a couple of methods that are
419directly useful, and others that are meant to be used by derived
420classes. These are intended for direct use:
421
422\begin{methoddesc}[BaseHandler]{add_parent}{director}
423Add a director as parent.
424\end{methoddesc}
425
426\begin{methoddesc}[BaseHandler]{close}{}
427Remove any parents.
428\end{methoddesc}
429
430The following members and methods should only be used by classes
431derived from \class{BaseHandler}. \note{The convention has been
432adopted that subclasses defining \method{\var{protocol}_request()} or
433\method{\var{protocol}_response()} methods are named
434\class{*Processor}; all others are named \class{*Handler}.}
435
436
437\begin{memberdesc}[BaseHandler]{parent}
438A valid \class{OpenerDirector}, which can be used to open using a
439different protocol, or handle errors.
440\end{memberdesc}
441
442\begin{methoddesc}[BaseHandler]{default_open}{req}
443This method is \emph{not} defined in \class{BaseHandler}, but
444subclasses should define it if they want to catch all URLs.
445
446This method, if implemented, will be called by the parent
447\class{OpenerDirector}. It should return a file-like object as
448described in the return value of the \method{open()} of
449\class{OpenerDirector}, or \code{None}. It should raise
450\exception{URLError}, unless a truly exceptional thing happens (for
451example, \exception{MemoryError} should not be mapped to
452\exception{URLError}).
453
454This method will be called before any protocol-specific open method.
455\end{methoddesc}
456
457\begin{methoddescni}[BaseHandler]{\var{protocol}_open}{req}
458This method is \emph{not} defined in \class{BaseHandler}, but
459subclasses should define it if they want to handle URLs with the given
460protocol.
461
462This method, if defined, will be called by the parent
463\class{OpenerDirector}. Return values should be the same as for
464\method{default_open()}.
465\end{methoddescni}
466
467\begin{methoddesc}[BaseHandler]{unknown_open}{req}
468This method is \var{not} defined in \class{BaseHandler}, but
469subclasses should define it if they want to catch all URLs with no
470specific registered handler to open it.
471
472This method, if implemented, will be called by the \member{parent}
473\class{OpenerDirector}. Return values should be the same as for
474\method{default_open()}.
475\end{methoddesc}
476
477\begin{methoddesc}[BaseHandler]{http_error_default}{req, fp, code, msg, hdrs}
478This method is \emph{not} defined in \class{BaseHandler}, but
479subclasses should override it if they intend to provide a catch-all
480for otherwise unhandled HTTP errors. It will be called automatically
481by the \class{OpenerDirector} getting the error, and should not
482normally be called in other circumstances.
483
484\var{req} will be a \class{Request} object, \var{fp} will be a
485file-like object with the HTTP error body, \var{code} will be the
486three-digit code of the error, \var{msg} will be the user-visible
487explanation of the code and \var{hdrs} will be a mapping object with
488the headers of the error.
489
490Return values and exceptions raised should be the same as those
491of \function{urlopen()}.
492\end{methoddesc}
493
494\begin{methoddesc}[BaseHandler]{http_error_\var{nnn}}{req, fp, code, msg, hdrs}
495\var{nnn} should be a three-digit HTTP error code. This method is
496also not defined in \class{BaseHandler}, but will be called, if it
497exists, on an instance of a subclass, when an HTTP error with code
498\var{nnn} occurs.
499
500Subclasses should override this method to handle specific HTTP
501errors.
502
503Arguments, return values and exceptions raised should be the same as
504for \method{http_error_default()}.
505\end{methoddesc}
506
507\begin{methoddescni}[BaseHandler]{\var{protocol}_request}{req}
508This method is \emph{not} defined in \class{BaseHandler}, but
509subclasses should define it if they want to pre-process requests of
510the given protocol.
511
512This method, if defined, will be called by the parent
513\class{OpenerDirector}. \var{req} will be a \class{Request} object.
514The return value should be a \class{Request} object.
515\end{methoddescni}
516
517\begin{methoddescni}[BaseHandler]{\var{protocol}_response}{req, response}
518This method is \emph{not} defined in \class{BaseHandler}, but
519subclasses should define it if they want to post-process responses of
520the given protocol.
521
522This method, if defined, will be called by the parent
523\class{OpenerDirector}. \var{req} will be a \class{Request} object.
524\var{response} will be an object implementing the same interface as
525the return value of \function{urlopen()}. The return value should
526implement the same interface as the return value of
527\function{urlopen()}.
528\end{methoddescni}
529
530\subsection{HTTPRedirectHandler Objects \label{http-redirect-handler}}
531
532\note{Some HTTP redirections require action from this module's client
533 code. If this is the case, \exception{HTTPError} is raised. See
534 \rfc{2616} for details of the precise meanings of the various
535 redirection codes.}
536
537\begin{methoddesc}[HTTPRedirectHandler]{redirect_request}{req,
538 fp, code, msg, hdrs}
539Return a \class{Request} or \code{None} in response to a redirect.
540This is called by the default implementations of the
541\method{http_error_30*()} methods when a redirection is received from
542the server. If a redirection should take place, return a new
543\class{Request} to allow \method{http_error_30*()} to perform the
544redirect. Otherwise, raise \exception{HTTPError} if no other handler
545should try to handle this URL, or return \code{None} if you can't but
546another handler might.
547
548\begin{notice}
549 The default implementation of this method does not strictly
550 follow \rfc{2616}, which says that 301 and 302 responses to \code{POST}
551 requests must not be automatically redirected without confirmation by
552 the user. In reality, browsers do allow automatic redirection of
553 these responses, changing the POST to a \code{GET}, and the default
554 implementation reproduces this behavior.
555\end{notice}
556\end{methoddesc}
557
558
559\begin{methoddesc}[HTTPRedirectHandler]{http_error_301}{req,
560 fp, code, msg, hdrs}
561Redirect to the \code{Location:} URL. This method is called by
562the parent \class{OpenerDirector} when getting an HTTP
563`moved permanently' response.
564\end{methoddesc}
565
566\begin{methoddesc}[HTTPRedirectHandler]{http_error_302}{req,
567 fp, code, msg, hdrs}
568The same as \method{http_error_301()}, but called for the
569`found' response.
570\end{methoddesc}
571
572\begin{methoddesc}[HTTPRedirectHandler]{http_error_303}{req,
573 fp, code, msg, hdrs}
574The same as \method{http_error_301()}, but called for the
575`see other' response.
576\end{methoddesc}
577
578\begin{methoddesc}[HTTPRedirectHandler]{http_error_307}{req,
579 fp, code, msg, hdrs}
580The same as \method{http_error_301()}, but called for the
581`temporary redirect' response.
582\end{methoddesc}
583
584
585\subsection{HTTPCookieProcessor Objects \label{http-cookie-processor}}
586
587\versionadded{2.4}
588
589\class{HTTPCookieProcessor} instances have one attribute:
590
591\begin{memberdesc}{cookiejar}
592The \class{cookielib.CookieJar} in which cookies are stored.
593\end{memberdesc}
594
595
596\subsection{ProxyHandler Objects \label{proxy-handler}}
597
598\begin{methoddescni}[ProxyHandler]{\var{protocol}_open}{request}
599The \class{ProxyHandler} will have a method
600\method{\var{protocol}_open()} for every \var{protocol} which has a
601proxy in the \var{proxies} dictionary given in the constructor. The
602method will modify requests to go through the proxy, by calling
603\code{request.set_proxy()}, and call the next handler in the chain to
604actually execute the protocol.
605\end{methoddescni}
606
607
608\subsection{HTTPPasswordMgr Objects \label{http-password-mgr}}
609
610These methods are available on \class{HTTPPasswordMgr} and
611\class{HTTPPasswordMgrWithDefaultRealm} objects.
612
613\begin{methoddesc}[HTTPPasswordMgr]{add_password}{realm, uri, user, passwd}
614\var{uri} can be either a single URI, or a sequence of URIs. \var{realm},
615\var{user} and \var{passwd} must be strings. This causes
616\code{(\var{user}, \var{passwd})} to be used as authentication tokens
617when authentication for \var{realm} and a super-URI of any of the
618given URIs is given.
619\end{methoddesc}
620
621\begin{methoddesc}[HTTPPasswordMgr]{find_user_password}{realm, authuri}
622Get user/password for given realm and URI, if any. This method will
623return \code{(None, None)} if there is no matching user/password.
624
625For \class{HTTPPasswordMgrWithDefaultRealm} objects, the realm
626\code{None} will be searched if the given \var{realm} has no matching
627user/password.
628\end{methoddesc}
629
630
631\subsection{AbstractBasicAuthHandler Objects
632 \label{abstract-basic-auth-handler}}
633
634\begin{methoddesc}[AbstractBasicAuthHandler]{http_error_auth_reqed}
635 {authreq, host, req, headers}
636Handle an authentication request by getting a user/password pair, and
637re-trying the request. \var{authreq} should be the name of the header
638where the information about the realm is included in the request,
639\var{host} specifies the URL and path to authenticate for, \var{req}
640should be the (failed) \class{Request} object, and \var{headers}
641should be the error headers.
642
643\var{host} is either an authority (e.g. \code{"python.org"}) or a URL
644containing an authority component (e.g. \code{"http://python.org/"}).
645In either case, the authority must not contain a userinfo component
646(so, \code{"python.org"} and \code{"python.org:80"} are fine,
647\code{"joe:password@python.org"} is not).
648\end{methoddesc}
649
650
651\subsection{HTTPBasicAuthHandler Objects
652 \label{http-basic-auth-handler}}
653
654\begin{methoddesc}[HTTPBasicAuthHandler]{http_error_401}{req, fp, code,
655 msg, hdrs}
656Retry the request with authentication information, if available.
657\end{methoddesc}
658
659
660\subsection{ProxyBasicAuthHandler Objects
661 \label{proxy-basic-auth-handler}}
662
663\begin{methoddesc}[ProxyBasicAuthHandler]{http_error_407}{req, fp, code,
664 msg, hdrs}
665Retry the request with authentication information, if available.
666\end{methoddesc}
667
668
669\subsection{AbstractDigestAuthHandler Objects
670 \label{abstract-digest-auth-handler}}
671
672\begin{methoddesc}[AbstractDigestAuthHandler]{http_error_auth_reqed}
673 {authreq, host, req, headers}
674\var{authreq} should be the name of the header where the information about
675the realm is included in the request, \var{host} should be the host to
676authenticate to, \var{req} should be the (failed) \class{Request}
677object, and \var{headers} should be the error headers.
678\end{methoddesc}
679
680
681\subsection{HTTPDigestAuthHandler Objects
682 \label{http-digest-auth-handler}}
683
684\begin{methoddesc}[HTTPDigestAuthHandler]{http_error_401}{req, fp, code,
685 msg, hdrs}
686Retry the request with authentication information, if available.
687\end{methoddesc}
688
689
690\subsection{ProxyDigestAuthHandler Objects
691 \label{proxy-digest-auth-handler}}
692
693\begin{methoddesc}[ProxyDigestAuthHandler]{http_error_407}{req, fp, code,
694 msg, hdrs}
695Retry the request with authentication information, if available.
696\end{methoddesc}
697
698
699\subsection{HTTPHandler Objects \label{http-handler-objects}}
700
701\begin{methoddesc}[HTTPHandler]{http_open}{req}
702Send an HTTP request, which can be either GET or POST, depending on
703\code{\var{req}.has_data()}.
704\end{methoddesc}
705
706
707\subsection{HTTPSHandler Objects \label{https-handler-objects}}
708
709\begin{methoddesc}[HTTPSHandler]{https_open}{req}
710Send an HTTPS request, which can be either GET or POST, depending on
711\code{\var{req}.has_data()}.
712\end{methoddesc}
713
714
715\subsection{FileHandler Objects \label{file-handler-objects}}
716
717\begin{methoddesc}[FileHandler]{file_open}{req}
718Open the file locally, if there is no host name, or
719the host name is \code{'localhost'}. Change the
720protocol to \code{ftp} otherwise, and retry opening
721it using \member{parent}.
722\end{methoddesc}
723
724
725\subsection{FTPHandler Objects \label{ftp-handler-objects}}
726
727\begin{methoddesc}[FTPHandler]{ftp_open}{req}
728Open the FTP file indicated by \var{req}.
729The login is always done with empty username and password.
730\end{methoddesc}
731
732
733\subsection{CacheFTPHandler Objects \label{cacheftp-handler-objects}}
734
735\class{CacheFTPHandler} objects are \class{FTPHandler} objects with
736the following additional methods:
737
738\begin{methoddesc}[CacheFTPHandler]{setTimeout}{t}
739Set timeout of connections to \var{t} seconds.
740\end{methoddesc}
741
742\begin{methoddesc}[CacheFTPHandler]{setMaxConns}{m}
743Set maximum number of cached connections to \var{m}.
744\end{methoddesc}
745
746
747\subsection{GopherHandler Objects \label{gopher-handler}}
748
749\begin{methoddesc}[GopherHandler]{gopher_open}{req}
750Open the gopher resource indicated by \var{req}.
751\end{methoddesc}
752
753
754\subsection{UnknownHandler Objects \label{unknown-handler-objects}}
755
756\begin{methoddesc}[UnknownHandler]{unknown_open}{}
757Raise a \exception{URLError} exception.
758\end{methoddesc}
759
760
761\subsection{HTTPErrorProcessor Objects \label{http-error-processor-objects}}
762
763\versionadded{2.4}
764
765\begin{methoddesc}[HTTPErrorProcessor]{unknown_open}{}
766Process HTTP error responses.
767
768For 200 error codes, the response object is returned immediately.
769
770For non-200 error codes, this simply passes the job on to the
771\method{\var{protocol}_error_\var{code}()} handler methods, via
772\method{OpenerDirector.error()}. Eventually,
773\class{urllib2.HTTPDefaultErrorHandler} will raise an
774\exception{HTTPError} if no other handler handles the error.
775\end{methoddesc}
776
777
778\subsection{Examples \label{urllib2-examples}}
779
780This example gets the python.org main page and displays the first 100
781bytes of it:
782
783\begin{verbatim}
784>>> import urllib2
785>>> f = urllib2.urlopen('http://www.python.org/')
786>>> print f.read(100)
787<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
788<?xml-stylesheet href="./css/ht2html
789\end{verbatim}
790
791Here we are sending a data-stream to the stdin of a CGI and reading
792the data it returns to us. Note that this example will only work when the
793Python installation supports SSL.
794
795\begin{verbatim}
796>>> import urllib2
797>>> req = urllib2.Request(url='https://localhost/cgi-bin/test.cgi',
798... data='This data is passed to stdin of the CGI')
799>>> f = urllib2.urlopen(req)
800>>> print f.read()
801Got Data: "This data is passed to stdin of the CGI"
802\end{verbatim}
803
804The code for the sample CGI used in the above example is:
805
806\begin{verbatim}
807#!/usr/bin/env python
808import sys
809data = sys.stdin.read()
810print 'Content-type: text-plain\n\nGot Data: "%s"' % data
811\end{verbatim}
812
813
814Use of Basic HTTP Authentication:
815
816\begin{verbatim}
817import urllib2
818# Create an OpenerDirector with support for Basic HTTP Authentication...
819auth_handler = urllib2.HTTPBasicAuthHandler()
820auth_handler.add_password('realm', 'host', 'username', 'password')
821opener = urllib2.build_opener(auth_handler)
822# ...and install it globally so it can be used with urlopen.
823urllib2.install_opener(opener)
824urllib2.urlopen('http://www.example.com/login.html')
825\end{verbatim}
826
827\function{build_opener()} provides many handlers by default, including a
828\class{ProxyHandler}. By default, \class{ProxyHandler} uses the
829environment variables named \code{<scheme>_proxy}, where \code{<scheme>}
830is the URL scheme involved. For example, the \envvar{http_proxy}
831environment variable is read to obtain the HTTP proxy's URL.
832
833This example replaces the default \class{ProxyHandler} with one that uses
834programatically-supplied proxy URLs, and adds proxy authorization support
835with \class{ProxyBasicAuthHandler}.
836
837\begin{verbatim}
838proxy_handler = urllib2.ProxyHandler({'http': 'http://www.example.com:3128/'})
839proxy_auth_handler = urllib2.HTTPBasicAuthHandler()
840proxy_auth_handler.add_password('realm', 'host', 'username', 'password')
841
842opener = build_opener(proxy_handler, proxy_auth_handler)
843# This time, rather than install the OpenerDirector, we use it directly:
844opener.open('http://www.example.com/login.html')
845\end{verbatim}
846
847
848Adding HTTP headers:
849
850Use the \var{headers} argument to the \class{Request} constructor, or:
851
852\begin{verbatim}
853import urllib2
854req = urllib2.Request('http://www.example.com/')
855req.add_header('Referer', 'http://www.python.org/')
856r = urllib2.urlopen(req)
857\end{verbatim}
858
859\class{OpenerDirector} automatically adds a \mailheader{User-Agent}
860header to every \class{Request}. To change this:
861
862\begin{verbatim}
863import urllib2
864opener = urllib2.build_opener()
865opener.addheaders = [('User-agent', 'Mozilla/5.0')]
866opener.open('http://www.example.com/')
867\end{verbatim}
868
869Also, remember that a few standard headers
870(\mailheader{Content-Length}, \mailheader{Content-Type} and
871\mailheader{Host}) are added when the \class{Request} is passed to
872\function{urlopen()} (or \method{OpenerDirector.open()}).
Note: See TracBrowser for help on using the repository browser.