1 | /* HTTP support.
|
---|
2 | Copyright (C) 2005 Free Software Foundation, Inc.
|
---|
3 |
|
---|
4 | This file is part of GNU Wget.
|
---|
5 |
|
---|
6 | GNU Wget is free software; you can redistribute it and/or modify
|
---|
7 | it under the terms of the GNU General Public License as published by
|
---|
8 | the Free Software Foundation; either version 2 of the License, or
|
---|
9 | (at your option) any later version.
|
---|
10 |
|
---|
11 | GNU Wget is distributed in the hope that it will be useful,
|
---|
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | GNU General Public License for more details.
|
---|
15 |
|
---|
16 | You should have received a copy of the GNU General Public License
|
---|
17 | along with Wget; if not, write to the Free Software
|
---|
18 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
19 |
|
---|
20 | In addition, as a special exception, the Free Software Foundation
|
---|
21 | gives permission to link the code of its release of Wget with the
|
---|
22 | OpenSSL project's "OpenSSL" library (or with modified versions of it
|
---|
23 | that use the same license as the "OpenSSL" library), and distribute
|
---|
24 | the linked executables. You must obey the GNU General Public License
|
---|
25 | in all respects for all of the code used other than "OpenSSL". If you
|
---|
26 | modify this file, you may extend this exception to your version of the
|
---|
27 | file, but you are not obligated to do so. If you do not wish to do
|
---|
28 | so, delete this exception statement from your version. */
|
---|
29 |
|
---|
30 | #include <config.h>
|
---|
31 |
|
---|
32 | #include <stdio.h>
|
---|
33 | #include <stdlib.h>
|
---|
34 | #include <sys/types.h>
|
---|
35 | #ifdef HAVE_STRING_H
|
---|
36 | # include <string.h>
|
---|
37 | #else
|
---|
38 | # include <strings.h>
|
---|
39 | #endif
|
---|
40 | #ifdef HAVE_UNISTD_H
|
---|
41 | # include <unistd.h>
|
---|
42 | #endif
|
---|
43 | #include <assert.h>
|
---|
44 | #include <errno.h>
|
---|
45 | #if TIME_WITH_SYS_TIME
|
---|
46 | # include <sys/time.h>
|
---|
47 | # include <time.h>
|
---|
48 | #else
|
---|
49 | # if HAVE_SYS_TIME_H
|
---|
50 | # include <sys/time.h>
|
---|
51 | # else
|
---|
52 | # include <time.h>
|
---|
53 | # endif
|
---|
54 | #endif
|
---|
55 | #ifndef errno
|
---|
56 | extern int errno;
|
---|
57 | #endif
|
---|
58 |
|
---|
59 | #include "wget.h"
|
---|
60 | #include "utils.h"
|
---|
61 | #include "url.h"
|
---|
62 | #include "host.h"
|
---|
63 | #include "retr.h"
|
---|
64 | #include "connect.h"
|
---|
65 | #include "netrc.h"
|
---|
66 | #ifdef HAVE_SSL
|
---|
67 | # include "ssl.h"
|
---|
68 | #endif
|
---|
69 | #ifdef ENABLE_NTLM
|
---|
70 | # include "http-ntlm.h"
|
---|
71 | #endif
|
---|
72 | #include "cookies.h"
|
---|
73 | #ifdef ENABLE_DIGEST
|
---|
74 | # include "gen-md5.h"
|
---|
75 | #endif
|
---|
76 | #include "convert.h"
|
---|
77 |
|
---|
78 | extern char *version_string;
|
---|
79 | extern SUM_SIZE_INT total_downloaded_bytes;
|
---|
80 |
|
---|
81 | extern FILE *output_stream;
|
---|
82 | extern int output_stream_regular;
|
---|
83 |
|
---|
84 | #ifndef MIN
|
---|
85 | # define MIN(x, y) ((x) > (y) ? (y) : (x))
|
---|
86 | #endif
|
---|
87 |
|
---|
88 | |
---|
89 |
|
---|
90 | static int cookies_loaded_p;
|
---|
91 | static struct cookie_jar *wget_cookie_jar;
|
---|
92 |
|
---|
93 | #define TEXTHTML_S "text/html"
|
---|
94 | #define TEXTXHTML_S "application/xhtml+xml"
|
---|
95 |
|
---|
96 | /* Some status code validation macros: */
|
---|
97 | #define H_20X(x) (((x) >= 200) && ((x) < 300))
|
---|
98 | #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
|
---|
99 | #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
|
---|
100 | || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
|
---|
101 | || (x) == HTTP_STATUS_SEE_OTHER \
|
---|
102 | || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
|
---|
103 |
|
---|
104 | /* HTTP/1.0 status codes from RFC1945, provided for reference. */
|
---|
105 | /* Successful 2xx. */
|
---|
106 | #define HTTP_STATUS_OK 200
|
---|
107 | #define HTTP_STATUS_CREATED 201
|
---|
108 | #define HTTP_STATUS_ACCEPTED 202
|
---|
109 | #define HTTP_STATUS_NO_CONTENT 204
|
---|
110 | #define HTTP_STATUS_PARTIAL_CONTENTS 206
|
---|
111 |
|
---|
112 | /* Redirection 3xx. */
|
---|
113 | #define HTTP_STATUS_MULTIPLE_CHOICES 300
|
---|
114 | #define HTTP_STATUS_MOVED_PERMANENTLY 301
|
---|
115 | #define HTTP_STATUS_MOVED_TEMPORARILY 302
|
---|
116 | #define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */
|
---|
117 | #define HTTP_STATUS_NOT_MODIFIED 304
|
---|
118 | #define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */
|
---|
119 |
|
---|
120 | /* Client error 4xx. */
|
---|
121 | #define HTTP_STATUS_BAD_REQUEST 400
|
---|
122 | #define HTTP_STATUS_UNAUTHORIZED 401
|
---|
123 | #define HTTP_STATUS_FORBIDDEN 403
|
---|
124 | #define HTTP_STATUS_NOT_FOUND 404
|
---|
125 | #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
|
---|
126 |
|
---|
127 | /* Server errors 5xx. */
|
---|
128 | #define HTTP_STATUS_INTERNAL 500
|
---|
129 | #define HTTP_STATUS_NOT_IMPLEMENTED 501
|
---|
130 | #define HTTP_STATUS_BAD_GATEWAY 502
|
---|
131 | #define HTTP_STATUS_UNAVAILABLE 503
|
---|
132 | |
---|
133 |
|
---|
134 | enum rp {
|
---|
135 | rel_none, rel_name, rel_value, rel_both
|
---|
136 | };
|
---|
137 |
|
---|
138 | struct request {
|
---|
139 | const char *method;
|
---|
140 | char *arg;
|
---|
141 |
|
---|
142 | struct request_header {
|
---|
143 | char *name, *value;
|
---|
144 | enum rp release_policy;
|
---|
145 | } *headers;
|
---|
146 | int hcount, hcapacity;
|
---|
147 | };
|
---|
148 |
|
---|
149 | /* Create a new, empty request. At least request_set_method must be
|
---|
150 | called before the request can be used. */
|
---|
151 |
|
---|
152 | static struct request *
|
---|
153 | request_new (void)
|
---|
154 | {
|
---|
155 | struct request *req = xnew0 (struct request);
|
---|
156 | req->hcapacity = 8;
|
---|
157 | req->headers = xnew_array (struct request_header, req->hcapacity);
|
---|
158 | return req;
|
---|
159 | }
|
---|
160 |
|
---|
161 | /* Set the request's method and its arguments. METH should be a
|
---|
162 | literal string (or it should outlive the request) because it will
|
---|
163 | not be freed. ARG will be freed by request_free. */
|
---|
164 |
|
---|
165 | static void
|
---|
166 | request_set_method (struct request *req, const char *meth, char *arg)
|
---|
167 | {
|
---|
168 | req->method = meth;
|
---|
169 | req->arg = arg;
|
---|
170 | }
|
---|
171 |
|
---|
172 | /* Return the method string passed with the last call to
|
---|
173 | request_set_method. */
|
---|
174 |
|
---|
175 | static const char *
|
---|
176 | request_method (const struct request *req)
|
---|
177 | {
|
---|
178 | return req->method;
|
---|
179 | }
|
---|
180 |
|
---|
181 | /* Free one header according to the release policy specified with
|
---|
182 | request_set_header. */
|
---|
183 |
|
---|
184 | static void
|
---|
185 | release_header (struct request_header *hdr)
|
---|
186 | {
|
---|
187 | switch (hdr->release_policy)
|
---|
188 | {
|
---|
189 | case rel_none:
|
---|
190 | break;
|
---|
191 | case rel_name:
|
---|
192 | xfree (hdr->name);
|
---|
193 | break;
|
---|
194 | case rel_value:
|
---|
195 | xfree (hdr->value);
|
---|
196 | break;
|
---|
197 | case rel_both:
|
---|
198 | xfree (hdr->name);
|
---|
199 | xfree (hdr->value);
|
---|
200 | break;
|
---|
201 | }
|
---|
202 | }
|
---|
203 |
|
---|
204 | /* Set the request named NAME to VALUE. Specifically, this means that
|
---|
205 | a "NAME: VALUE\r\n" header line will be used in the request. If a
|
---|
206 | header with the same name previously existed in the request, its
|
---|
207 | value will be replaced by this one. A NULL value means do nothing.
|
---|
208 |
|
---|
209 | RELEASE_POLICY determines whether NAME and VALUE should be released
|
---|
210 | (freed) with request_free. Allowed values are:
|
---|
211 |
|
---|
212 | - rel_none - don't free NAME or VALUE
|
---|
213 | - rel_name - free NAME when done
|
---|
214 | - rel_value - free VALUE when done
|
---|
215 | - rel_both - free both NAME and VALUE when done
|
---|
216 |
|
---|
217 | Setting release policy is useful when arguments come from different
|
---|
218 | sources. For example:
|
---|
219 |
|
---|
220 | // Don't free literal strings!
|
---|
221 | request_set_header (req, "Pragma", "no-cache", rel_none);
|
---|
222 |
|
---|
223 | // Don't free a global variable, we'll need it later.
|
---|
224 | request_set_header (req, "Referer", opt.referer, rel_none);
|
---|
225 |
|
---|
226 | // Value freshly allocated, free it when done.
|
---|
227 | request_set_header (req, "Range",
|
---|
228 | aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
|
---|
229 | rel_value);
|
---|
230 | */
|
---|
231 |
|
---|
232 | static void
|
---|
233 | request_set_header (struct request *req, char *name, char *value,
|
---|
234 | enum rp release_policy)
|
---|
235 | {
|
---|
236 | struct request_header *hdr;
|
---|
237 | int i;
|
---|
238 |
|
---|
239 | if (!value)
|
---|
240 | {
|
---|
241 | /* A NULL value is a no-op; if freeing the name is requested,
|
---|
242 | free it now to avoid leaks. */
|
---|
243 | if (release_policy == rel_name || release_policy == rel_both)
|
---|
244 | xfree (name);
|
---|
245 | return;
|
---|
246 | }
|
---|
247 |
|
---|
248 | for (i = 0; i < req->hcount; i++)
|
---|
249 | {
|
---|
250 | hdr = &req->headers[i];
|
---|
251 | if (0 == strcasecmp (name, hdr->name))
|
---|
252 | {
|
---|
253 | /* Replace existing header. */
|
---|
254 | release_header (hdr);
|
---|
255 | hdr->name = name;
|
---|
256 | hdr->value = value;
|
---|
257 | hdr->release_policy = release_policy;
|
---|
258 | return;
|
---|
259 | }
|
---|
260 | }
|
---|
261 |
|
---|
262 | /* Install new header. */
|
---|
263 |
|
---|
264 | if (req->hcount >= req->hcapacity)
|
---|
265 | {
|
---|
266 | req->hcapacity <<= 1;
|
---|
267 | req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
|
---|
268 | }
|
---|
269 | hdr = &req->headers[req->hcount++];
|
---|
270 | hdr->name = name;
|
---|
271 | hdr->value = value;
|
---|
272 | hdr->release_policy = release_policy;
|
---|
273 | }
|
---|
274 |
|
---|
275 | /* Like request_set_header, but sets the whole header line, as
|
---|
276 | provided by the user using the `--header' option. For example,
|
---|
277 | request_set_user_header (req, "Foo: bar") works just like
|
---|
278 | request_set_header (req, "Foo", "bar"). */
|
---|
279 |
|
---|
280 | static void
|
---|
281 | request_set_user_header (struct request *req, const char *header)
|
---|
282 | {
|
---|
283 | char *name;
|
---|
284 | const char *p = strchr (header, ':');
|
---|
285 | if (!p)
|
---|
286 | return;
|
---|
287 | BOUNDED_TO_ALLOCA (header, p, name);
|
---|
288 | ++p;
|
---|
289 | while (ISSPACE (*p))
|
---|
290 | ++p;
|
---|
291 | request_set_header (req, xstrdup (name), (char *) p, rel_name);
|
---|
292 | }
|
---|
293 |
|
---|
294 | /* Remove the header with specified name from REQ. Returns 1 if the
|
---|
295 | header was actually removed, 0 otherwise. */
|
---|
296 |
|
---|
297 | static int
|
---|
298 | request_remove_header (struct request *req, char *name)
|
---|
299 | {
|
---|
300 | int i;
|
---|
301 | for (i = 0; i < req->hcount; i++)
|
---|
302 | {
|
---|
303 | struct request_header *hdr = &req->headers[i];
|
---|
304 | if (0 == strcasecmp (name, hdr->name))
|
---|
305 | {
|
---|
306 | release_header (hdr);
|
---|
307 | /* Move the remaining headers by one. */
|
---|
308 | if (i < req->hcount - 1)
|
---|
309 | memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
|
---|
310 | --req->hcount;
|
---|
311 | return 1;
|
---|
312 | }
|
---|
313 | }
|
---|
314 | return 0;
|
---|
315 | }
|
---|
316 |
|
---|
317 | #define APPEND(p, str) do { \
|
---|
318 | int A_len = strlen (str); \
|
---|
319 | memcpy (p, str, A_len); \
|
---|
320 | p += A_len; \
|
---|
321 | } while (0)
|
---|
322 |
|
---|
323 | /* Construct the request and write it to FD using fd_write. */
|
---|
324 |
|
---|
325 | static int
|
---|
326 | request_send (const struct request *req, int fd)
|
---|
327 | {
|
---|
328 | char *request_string, *p;
|
---|
329 | int i, size, write_error;
|
---|
330 |
|
---|
331 | /* Count the request size. */
|
---|
332 | size = 0;
|
---|
333 |
|
---|
334 | /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
|
---|
335 | size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
|
---|
336 |
|
---|
337 | for (i = 0; i < req->hcount; i++)
|
---|
338 | {
|
---|
339 | struct request_header *hdr = &req->headers[i];
|
---|
340 | /* NAME ": " VALUE "\r\n" */
|
---|
341 | size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
|
---|
342 | }
|
---|
343 |
|
---|
344 | /* "\r\n\0" */
|
---|
345 | size += 3;
|
---|
346 |
|
---|
347 | p = request_string = alloca_array (char, size);
|
---|
348 |
|
---|
349 | /* Generate the request. */
|
---|
350 |
|
---|
351 | APPEND (p, req->method); *p++ = ' ';
|
---|
352 | APPEND (p, req->arg); *p++ = ' ';
|
---|
353 | memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
|
---|
354 |
|
---|
355 | for (i = 0; i < req->hcount; i++)
|
---|
356 | {
|
---|
357 | struct request_header *hdr = &req->headers[i];
|
---|
358 | APPEND (p, hdr->name);
|
---|
359 | *p++ = ':', *p++ = ' ';
|
---|
360 | APPEND (p, hdr->value);
|
---|
361 | *p++ = '\r', *p++ = '\n';
|
---|
362 | }
|
---|
363 |
|
---|
364 | *p++ = '\r', *p++ = '\n', *p++ = '\0';
|
---|
365 | assert (p - request_string == size);
|
---|
366 |
|
---|
367 | #undef APPEND
|
---|
368 |
|
---|
369 | DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
|
---|
370 |
|
---|
371 | /* Send the request to the server. */
|
---|
372 |
|
---|
373 | write_error = fd_write (fd, request_string, size - 1, -1.0);
|
---|
374 | if (write_error < 0)
|
---|
375 | logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
|
---|
376 | strerror (errno));
|
---|
377 | return write_error;
|
---|
378 | }
|
---|
379 |
|
---|
380 | /* Release the resources used by REQ. */
|
---|
381 |
|
---|
382 | static void
|
---|
383 | request_free (struct request *req)
|
---|
384 | {
|
---|
385 | int i;
|
---|
386 | xfree_null (req->arg);
|
---|
387 | for (i = 0; i < req->hcount; i++)
|
---|
388 | release_header (&req->headers[i]);
|
---|
389 | xfree_null (req->headers);
|
---|
390 | xfree (req);
|
---|
391 | }
|
---|
392 |
|
---|
393 | /* Send the contents of FILE_NAME to SOCK. Make sure that exactly
|
---|
394 | PROMISED_SIZE bytes are sent over the wire -- if the file is
|
---|
395 | longer, read only that much; if the file is shorter, report an error. */
|
---|
396 |
|
---|
397 | static int
|
---|
398 | post_file (int sock, const char *file_name, wgint promised_size)
|
---|
399 | {
|
---|
400 | static char chunk[8192];
|
---|
401 | wgint written = 0;
|
---|
402 | int write_error;
|
---|
403 | FILE *fp;
|
---|
404 |
|
---|
405 | DEBUGP (("[writing POST file %s ... ", file_name));
|
---|
406 |
|
---|
407 | fp = fopen (file_name, "rb");
|
---|
408 | if (!fp)
|
---|
409 | return -1;
|
---|
410 | while (!feof (fp) && written < promised_size)
|
---|
411 | {
|
---|
412 | int towrite;
|
---|
413 | int length = fread (chunk, 1, sizeof (chunk), fp);
|
---|
414 | if (length == 0)
|
---|
415 | break;
|
---|
416 | towrite = MIN (promised_size - written, length);
|
---|
417 | write_error = fd_write (sock, chunk, towrite, -1.0);
|
---|
418 | if (write_error < 0)
|
---|
419 | {
|
---|
420 | fclose (fp);
|
---|
421 | return -1;
|
---|
422 | }
|
---|
423 | written += towrite;
|
---|
424 | }
|
---|
425 | fclose (fp);
|
---|
426 |
|
---|
427 | /* If we've written less than was promised, report a (probably
|
---|
428 | nonsensical) error rather than break the promise. */
|
---|
429 | if (written < promised_size)
|
---|
430 | {
|
---|
431 | errno = EINVAL;
|
---|
432 | return -1;
|
---|
433 | }
|
---|
434 |
|
---|
435 | assert (written == promised_size);
|
---|
436 | DEBUGP (("done]\n"));
|
---|
437 | return 0;
|
---|
438 | }
|
---|
439 | |
---|
440 |
|
---|
441 | static const char *
|
---|
442 | response_head_terminator (const char *hunk, int oldlen, int peeklen)
|
---|
443 | {
|
---|
444 | const char *start, *end;
|
---|
445 |
|
---|
446 | /* If at first peek, verify whether HUNK starts with "HTTP". If
|
---|
447 | not, this is a HTTP/0.9 request and we must bail out without
|
---|
448 | reading anything. */
|
---|
449 | if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
|
---|
450 | return hunk;
|
---|
451 |
|
---|
452 | if (oldlen < 4)
|
---|
453 | start = hunk;
|
---|
454 | else
|
---|
455 | start = hunk + oldlen - 4;
|
---|
456 | end = hunk + oldlen + peeklen;
|
---|
457 |
|
---|
458 | for (; start < end - 1; start++)
|
---|
459 | if (*start == '\n')
|
---|
460 | {
|
---|
461 | if (start < end - 2
|
---|
462 | && start[1] == '\r'
|
---|
463 | && start[2] == '\n')
|
---|
464 | return start + 3;
|
---|
465 | if (start[1] == '\n')
|
---|
466 | return start + 2;
|
---|
467 | }
|
---|
468 | return NULL;
|
---|
469 | }
|
---|
470 |
|
---|
471 | /* The maximum size of a single HTTP response we care to read. This
|
---|
472 | is not meant to impose an arbitrary limit, but to protect the user
|
---|
473 | from Wget slurping up available memory upon encountering malicious
|
---|
474 | or buggy server output. Define it to 0 to remove the limit. */
|
---|
475 |
|
---|
476 | #define HTTP_RESPONSE_MAX_SIZE 65536
|
---|
477 |
|
---|
478 | /* Read the HTTP request head from FD and return it. The error
|
---|
479 | conditions are the same as with fd_read_hunk.
|
---|
480 |
|
---|
481 | To support HTTP/0.9 responses, this function tries to make sure
|
---|
482 | that the data begins with "HTTP". If this is not the case, no data
|
---|
483 | is read and an empty request is returned, so that the remaining
|
---|
484 | data can be treated as body. */
|
---|
485 |
|
---|
486 | static char *
|
---|
487 | read_http_response_head (int fd)
|
---|
488 | {
|
---|
489 | return fd_read_hunk (fd, response_head_terminator, 512,
|
---|
490 | HTTP_RESPONSE_MAX_SIZE);
|
---|
491 | }
|
---|
492 |
|
---|
493 | struct response {
|
---|
494 | /* The response data. */
|
---|
495 | const char *data;
|
---|
496 |
|
---|
497 | /* The array of pointers that indicate where each header starts.
|
---|
498 | For example, given this HTTP response:
|
---|
499 |
|
---|
500 | HTTP/1.0 200 Ok
|
---|
501 | Description: some
|
---|
502 | text
|
---|
503 | Etag: x
|
---|
504 |
|
---|
505 | The headers are located like this:
|
---|
506 |
|
---|
507 | "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
|
---|
508 | ^ ^ ^ ^
|
---|
509 | headers[0] headers[1] headers[2] headers[3]
|
---|
510 |
|
---|
511 | I.e. headers[0] points to the beginning of the request,
|
---|
512 | headers[1] points to the end of the first header and the
|
---|
513 | beginning of the second one, etc. */
|
---|
514 |
|
---|
515 | const char **headers;
|
---|
516 | };
|
---|
517 |
|
---|
518 | /* Create a new response object from the text of the HTTP response,
|
---|
519 | available in HEAD. That text is automatically split into
|
---|
520 | constituent header lines for fast retrieval using
|
---|
521 | resp_header_*. */
|
---|
522 |
|
---|
523 | static struct response *
|
---|
524 | resp_new (const char *head)
|
---|
525 | {
|
---|
526 | const char *hdr;
|
---|
527 | int count, size;
|
---|
528 |
|
---|
529 | struct response *resp = xnew0 (struct response);
|
---|
530 | resp->data = head;
|
---|
531 |
|
---|
532 | if (*head == '\0')
|
---|
533 | {
|
---|
534 | /* Empty head means that we're dealing with a headerless
|
---|
535 | (HTTP/0.9) response. In that case, don't set HEADERS at
|
---|
536 | all. */
|
---|
537 | return resp;
|
---|
538 | }
|
---|
539 |
|
---|
540 | /* Split HEAD into header lines, so that resp_header_* functions
|
---|
541 | don't need to do this over and over again. */
|
---|
542 |
|
---|
543 | size = count = 0;
|
---|
544 | hdr = head;
|
---|
545 | while (1)
|
---|
546 | {
|
---|
547 | DO_REALLOC (resp->headers, size, count + 1, const char *);
|
---|
548 | resp->headers[count++] = hdr;
|
---|
549 |
|
---|
550 | /* Break upon encountering an empty line. */
|
---|
551 | if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
|
---|
552 | break;
|
---|
553 |
|
---|
554 | /* Find the end of HDR, including continuations. */
|
---|
555 | do
|
---|
556 | {
|
---|
557 | const char *end = strchr (hdr, '\n');
|
---|
558 | if (end)
|
---|
559 | hdr = end + 1;
|
---|
560 | else
|
---|
561 | hdr += strlen (hdr);
|
---|
562 | }
|
---|
563 | while (*hdr == ' ' || *hdr == '\t');
|
---|
564 | }
|
---|
565 | DO_REALLOC (resp->headers, size, count + 1, const char *);
|
---|
566 | resp->headers[count] = NULL;
|
---|
567 |
|
---|
568 | return resp;
|
---|
569 | }
|
---|
570 |
|
---|
571 | /* Locate the header named NAME in the request data, starting with
|
---|
572 | position START. This allows the code to loop through the request
|
---|
573 | data, filtering for all requests of a given name. Returns the
|
---|
574 | found position, or -1 for failure. The code that uses this
|
---|
575 | function typically looks like this:
|
---|
576 |
|
---|
577 | for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
|
---|
578 | ... do something with header ...
|
---|
579 |
|
---|
580 | If you only care about one header, use resp_header_get instead of
|
---|
581 | this function. */
|
---|
582 |
|
---|
583 | static int
|
---|
584 | resp_header_locate (const struct response *resp, const char *name, int start,
|
---|
585 | const char **begptr, const char **endptr)
|
---|
586 | {
|
---|
587 | int i;
|
---|
588 | const char **headers = resp->headers;
|
---|
589 | int name_len;
|
---|
590 |
|
---|
591 | if (!headers || !headers[1])
|
---|
592 | return -1;
|
---|
593 |
|
---|
594 | name_len = strlen (name);
|
---|
595 | if (start > 0)
|
---|
596 | i = start;
|
---|
597 | else
|
---|
598 | i = 1;
|
---|
599 |
|
---|
600 | for (; headers[i + 1]; i++)
|
---|
601 | {
|
---|
602 | const char *b = headers[i];
|
---|
603 | const char *e = headers[i + 1];
|
---|
604 | if (e - b > name_len
|
---|
605 | && b[name_len] == ':'
|
---|
606 | && 0 == strncasecmp (b, name, name_len))
|
---|
607 | {
|
---|
608 | b += name_len + 1;
|
---|
609 | while (b < e && ISSPACE (*b))
|
---|
610 | ++b;
|
---|
611 | while (b < e && ISSPACE (e[-1]))
|
---|
612 | --e;
|
---|
613 | *begptr = b;
|
---|
614 | *endptr = e;
|
---|
615 | return i;
|
---|
616 | }
|
---|
617 | }
|
---|
618 | return -1;
|
---|
619 | }
|
---|
620 |
|
---|
621 | /* Find and retrieve the header named NAME in the request data. If
|
---|
622 | found, set *BEGPTR to its starting, and *ENDPTR to its ending
|
---|
623 | position, and return 1. Otherwise return 0.
|
---|
624 |
|
---|
625 | This function is used as a building block for resp_header_copy
|
---|
626 | and resp_header_strdup. */
|
---|
627 |
|
---|
628 | static int
|
---|
629 | resp_header_get (const struct response *resp, const char *name,
|
---|
630 | const char **begptr, const char **endptr)
|
---|
631 | {
|
---|
632 | int pos = resp_header_locate (resp, name, 0, begptr, endptr);
|
---|
633 | return pos != -1;
|
---|
634 | }
|
---|
635 |
|
---|
636 | /* Copy the response header named NAME to buffer BUF, no longer than
|
---|
637 | BUFSIZE (BUFSIZE includes the terminating 0). If the header
|
---|
638 | exists, 1 is returned, otherwise 0. If there should be no limit on
|
---|
639 | the size of the header, use resp_header_strdup instead.
|
---|
640 |
|
---|
641 | If BUFSIZE is 0, no data is copied, but the boolean indication of
|
---|
642 | whether the header is present is still returned. */
|
---|
643 |
|
---|
644 | static int
|
---|
645 | resp_header_copy (const struct response *resp, const char *name,
|
---|
646 | char *buf, int bufsize)
|
---|
647 | {
|
---|
648 | const char *b, *e;
|
---|
649 | if (!resp_header_get (resp, name, &b, &e))
|
---|
650 | return 0;
|
---|
651 | if (bufsize)
|
---|
652 | {
|
---|
653 | int len = MIN (e - b, bufsize - 1);
|
---|
654 | memcpy (buf, b, len);
|
---|
655 | buf[len] = '\0';
|
---|
656 | }
|
---|
657 | return 1;
|
---|
658 | }
|
---|
659 |
|
---|
660 | /* Return the value of header named NAME in RESP, allocated with
|
---|
661 | malloc. If such a header does not exist in RESP, return NULL. */
|
---|
662 |
|
---|
663 | static char *
|
---|
664 | resp_header_strdup (const struct response *resp, const char *name)
|
---|
665 | {
|
---|
666 | const char *b, *e;
|
---|
667 | if (!resp_header_get (resp, name, &b, &e))
|
---|
668 | return NULL;
|
---|
669 | return strdupdelim (b, e);
|
---|
670 | }
|
---|
671 |
|
---|
672 | /* Parse the HTTP status line, which is of format:
|
---|
673 |
|
---|
674 | HTTP-Version SP Status-Code SP Reason-Phrase
|
---|
675 |
|
---|
676 | The function returns the status-code, or -1 if the status line
|
---|
677 | appears malformed. The pointer to "reason-phrase" message is
|
---|
678 | returned in *MESSAGE. */
|
---|
679 |
|
---|
680 | static int
|
---|
681 | resp_status (const struct response *resp, char **message)
|
---|
682 | {
|
---|
683 | int status;
|
---|
684 | const char *p, *end;
|
---|
685 |
|
---|
686 | if (!resp->headers)
|
---|
687 | {
|
---|
688 | /* For a HTTP/0.9 response, assume status 200. */
|
---|
689 | if (message)
|
---|
690 | *message = xstrdup (_("No headers, assuming HTTP/0.9"));
|
---|
691 | return 200;
|
---|
692 | }
|
---|
693 |
|
---|
694 | p = resp->headers[0];
|
---|
695 | end = resp->headers[1];
|
---|
696 |
|
---|
697 | if (!end)
|
---|
698 | return -1;
|
---|
699 |
|
---|
700 | /* "HTTP" */
|
---|
701 | if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
|
---|
702 | return -1;
|
---|
703 | p += 4;
|
---|
704 |
|
---|
705 | /* Match the HTTP version. This is optional because Gnutella
|
---|
706 | servers have been reported to not specify HTTP version. */
|
---|
707 | if (p < end && *p == '/')
|
---|
708 | {
|
---|
709 | ++p;
|
---|
710 | while (p < end && ISDIGIT (*p))
|
---|
711 | ++p;
|
---|
712 | if (p < end && *p == '.')
|
---|
713 | ++p;
|
---|
714 | while (p < end && ISDIGIT (*p))
|
---|
715 | ++p;
|
---|
716 | }
|
---|
717 |
|
---|
718 | while (p < end && ISSPACE (*p))
|
---|
719 | ++p;
|
---|
720 | if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
|
---|
721 | return -1;
|
---|
722 |
|
---|
723 | status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
|
---|
724 | p += 3;
|
---|
725 |
|
---|
726 | if (message)
|
---|
727 | {
|
---|
728 | while (p < end && ISSPACE (*p))
|
---|
729 | ++p;
|
---|
730 | while (p < end && ISSPACE (end[-1]))
|
---|
731 | --end;
|
---|
732 | *message = strdupdelim (p, end);
|
---|
733 | }
|
---|
734 |
|
---|
735 | return status;
|
---|
736 | }
|
---|
737 |
|
---|
738 | /* Release the resources used by RESP. */
|
---|
739 |
|
---|
740 | static void
|
---|
741 | resp_free (struct response *resp)
|
---|
742 | {
|
---|
743 | xfree_null (resp->headers);
|
---|
744 | xfree (resp);
|
---|
745 | }
|
---|
746 |
|
---|
747 | /* Print the server response, line by line, omitting the trailing CRLF
|
---|
748 | from individual header lines, and prefixed with PREFIX. */
|
---|
749 |
|
---|
750 | static void
|
---|
751 | print_server_response (const struct response *resp, const char *prefix)
|
---|
752 | {
|
---|
753 | int i;
|
---|
754 | if (!resp->headers)
|
---|
755 | return;
|
---|
756 | for (i = 0; resp->headers[i + 1]; i++)
|
---|
757 | {
|
---|
758 | const char *b = resp->headers[i];
|
---|
759 | const char *e = resp->headers[i + 1];
|
---|
760 | /* Skip CRLF */
|
---|
761 | if (b < e && e[-1] == '\n')
|
---|
762 | --e;
|
---|
763 | if (b < e && e[-1] == '\r')
|
---|
764 | --e;
|
---|
765 | /* This is safe even on printfs with broken handling of "%.<n>s"
|
---|
766 | because resp->headers ends with \0. */
|
---|
767 | logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
|
---|
768 | }
|
---|
769 | }
|
---|
770 |
|
---|
771 | /* Parse the `Content-Range' header and extract the information it
|
---|
772 | contains. Returns 1 if successful, -1 otherwise. */
|
---|
773 | static int
|
---|
774 | parse_content_range (const char *hdr, wgint *first_byte_ptr,
|
---|
775 | wgint *last_byte_ptr, wgint *entity_length_ptr)
|
---|
776 | {
|
---|
777 | wgint num;
|
---|
778 |
|
---|
779 | /* Ancient versions of Netscape proxy server, presumably predating
|
---|
780 | rfc2068, sent out `Content-Range' without the "bytes"
|
---|
781 | specifier. */
|
---|
782 | if (!strncasecmp (hdr, "bytes", 5))
|
---|
783 | {
|
---|
784 | hdr += 5;
|
---|
785 | /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
|
---|
786 | HTTP spec. */
|
---|
787 | if (*hdr == ':')
|
---|
788 | ++hdr;
|
---|
789 | while (ISSPACE (*hdr))
|
---|
790 | ++hdr;
|
---|
791 | if (!*hdr)
|
---|
792 | return 0;
|
---|
793 | }
|
---|
794 | if (!ISDIGIT (*hdr))
|
---|
795 | return 0;
|
---|
796 | for (num = 0; ISDIGIT (*hdr); hdr++)
|
---|
797 | num = 10 * num + (*hdr - '0');
|
---|
798 | if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
|
---|
799 | return 0;
|
---|
800 | *first_byte_ptr = num;
|
---|
801 | ++hdr;
|
---|
802 | for (num = 0; ISDIGIT (*hdr); hdr++)
|
---|
803 | num = 10 * num + (*hdr - '0');
|
---|
804 | if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
|
---|
805 | return 0;
|
---|
806 | *last_byte_ptr = num;
|
---|
807 | ++hdr;
|
---|
808 | for (num = 0; ISDIGIT (*hdr); hdr++)
|
---|
809 | num = 10 * num + (*hdr - '0');
|
---|
810 | *entity_length_ptr = num;
|
---|
811 | return 1;
|
---|
812 | }
|
---|
813 |
|
---|
814 | /* Read the body of the request, but don't store it anywhere and don't
|
---|
815 | display a progress gauge. This is useful for reading the bodies of
|
---|
816 | administrative responses to which we will soon issue another
|
---|
817 | request. The response is not useful to the user, but reading it
|
---|
818 | allows us to continue using the same connection to the server.
|
---|
819 |
|
---|
820 | If reading fails, 0 is returned, non-zero otherwise. In debug
|
---|
821 | mode, the body is displayed for debugging purposes. */
|
---|
822 |
|
---|
823 | static int
|
---|
824 | skip_short_body (int fd, wgint contlen)
|
---|
825 | {
|
---|
826 | enum {
|
---|
827 | SKIP_SIZE = 512, /* size of the download buffer */
|
---|
828 | SKIP_THRESHOLD = 4096 /* the largest size we read */
|
---|
829 | };
|
---|
830 | char dlbuf[SKIP_SIZE + 1];
|
---|
831 | dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
|
---|
832 |
|
---|
833 | /* We shouldn't get here with unknown contlen. (This will change
|
---|
834 | with HTTP/1.1, which supports "chunked" transfer.) */
|
---|
835 | assert (contlen != -1);
|
---|
836 |
|
---|
837 | /* If the body is too large, it makes more sense to simply close the
|
---|
838 | connection than to try to read the body. */
|
---|
839 | if (contlen > SKIP_THRESHOLD)
|
---|
840 | return 0;
|
---|
841 |
|
---|
842 | DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
|
---|
843 |
|
---|
844 | while (contlen > 0)
|
---|
845 | {
|
---|
846 | int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0);
|
---|
847 | if (ret <= 0)
|
---|
848 | {
|
---|
849 | /* Don't normally report the error since this is an
|
---|
850 | optimization that should be invisible to the user. */
|
---|
851 | DEBUGP (("] aborting (%s).\n",
|
---|
852 | ret < 0 ? strerror (errno) : "EOF received"));
|
---|
853 | return 0;
|
---|
854 | }
|
---|
855 | contlen -= ret;
|
---|
856 | /* Safe even if %.*s bogusly expects terminating \0 because
|
---|
857 | we've zero-terminated dlbuf above. */
|
---|
858 | DEBUGP (("%.*s", ret, dlbuf));
|
---|
859 | }
|
---|
860 |
|
---|
861 | DEBUGP (("] done.\n"));
|
---|
862 | return 1;
|
---|
863 | }
|
---|
864 | |
---|
865 |
|
---|
866 | /* Persistent connections. Currently, we cache the most recently used
|
---|
867 | connection as persistent, provided that the HTTP server agrees to
|
---|
868 | make it such. The persistence data is stored in the variables
|
---|
869 | below. Ideally, it should be possible to cache an arbitrary fixed
|
---|
870 | number of these connections. */
|
---|
871 |
|
---|
872 | /* Whether a persistent connection is active. */
|
---|
873 | static int pconn_active;
|
---|
874 |
|
---|
875 | static struct {
|
---|
876 | /* The socket of the connection. */
|
---|
877 | int socket;
|
---|
878 |
|
---|
879 | /* Host and port of the currently active persistent connection. */
|
---|
880 | char *host;
|
---|
881 | int port;
|
---|
882 |
|
---|
883 | /* Whether a ssl handshake has occoured on this connection. */
|
---|
884 | int ssl;
|
---|
885 |
|
---|
886 | /* Whether the connection was authorized. This is only done by
|
---|
887 | NTLM, which authorizes *connections* rather than individual
|
---|
888 | requests. (That practice is peculiar for HTTP, but it is a
|
---|
889 | useful optimization.) */
|
---|
890 | int authorized;
|
---|
891 |
|
---|
892 | #ifdef ENABLE_NTLM
|
---|
893 | /* NTLM data of the current connection. */
|
---|
894 | struct ntlmdata ntlm;
|
---|
895 | #endif
|
---|
896 | } pconn;
|
---|
897 |
|
---|
898 | /* Mark the persistent connection as invalid and free the resources it
|
---|
899 | uses. This is used by the CLOSE_* macros after they forcefully
|
---|
900 | close a registered persistent connection. */
|
---|
901 |
|
---|
902 | static void
|
---|
903 | invalidate_persistent (void)
|
---|
904 | {
|
---|
905 | DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
|
---|
906 | pconn_active = 0;
|
---|
907 | fd_close (pconn.socket);
|
---|
908 | xfree (pconn.host);
|
---|
909 | xzero (pconn);
|
---|
910 | }
|
---|
911 |
|
---|
912 | /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
|
---|
913 | persistent. This will enable someone to use the same connection
|
---|
914 | later. In the context of HTTP, this must be called only AFTER the
|
---|
915 | response has been received and the server has promised that the
|
---|
916 | connection will remain alive.
|
---|
917 |
|
---|
918 | If a previous connection was persistent, it is closed. */
|
---|
919 |
|
---|
920 | static void
|
---|
921 | register_persistent (const char *host, int port, int fd, int ssl)
|
---|
922 | {
|
---|
923 | if (pconn_active)
|
---|
924 | {
|
---|
925 | if (pconn.socket == fd)
|
---|
926 | {
|
---|
927 | /* The connection FD is already registered. */
|
---|
928 | return;
|
---|
929 | }
|
---|
930 | else
|
---|
931 | {
|
---|
932 | /* The old persistent connection is still active; close it
|
---|
933 | first. This situation arises whenever a persistent
|
---|
934 | connection exists, but we then connect to a different
|
---|
935 | host, and try to register a persistent connection to that
|
---|
936 | one. */
|
---|
937 | invalidate_persistent ();
|
---|
938 | }
|
---|
939 | }
|
---|
940 |
|
---|
941 | pconn_active = 1;
|
---|
942 | pconn.socket = fd;
|
---|
943 | pconn.host = xstrdup (host);
|
---|
944 | pconn.port = port;
|
---|
945 | pconn.ssl = ssl;
|
---|
946 | pconn.authorized = 0;
|
---|
947 |
|
---|
948 | DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
|
---|
949 | }
|
---|
950 |
|
---|
951 | /* Return non-zero if a persistent connection is available for
|
---|
952 | connecting to HOST:PORT. */
|
---|
953 |
|
---|
954 | static int
|
---|
955 | persistent_available_p (const char *host, int port, int ssl,
|
---|
956 | int *host_lookup_failed)
|
---|
957 | {
|
---|
958 | /* First, check whether a persistent connection is active at all. */
|
---|
959 | if (!pconn_active)
|
---|
960 | return 0;
|
---|
961 |
|
---|
962 | /* If we want SSL and the last connection wasn't or vice versa,
|
---|
963 | don't use it. Checking for host and port is not enough because
|
---|
964 | HTTP and HTTPS can apparently coexist on the same port. */
|
---|
965 | if (ssl != pconn.ssl)
|
---|
966 | return 0;
|
---|
967 |
|
---|
968 | /* If we're not connecting to the same port, we're not interested. */
|
---|
969 | if (port != pconn.port)
|
---|
970 | return 0;
|
---|
971 |
|
---|
972 | /* If the host is the same, we're in business. If not, there is
|
---|
973 | still hope -- read below. */
|
---|
974 | if (0 != strcasecmp (host, pconn.host))
|
---|
975 | {
|
---|
976 | /* Check if pconn.socket is talking to HOST under another name.
|
---|
977 | This happens often when both sites are virtual hosts
|
---|
978 | distinguished only by name and served by the same network
|
---|
979 | interface, and hence the same web server (possibly set up by
|
---|
980 | the ISP and serving many different web sites). This
|
---|
981 | admittedly unconventional optimization does not contradict
|
---|
982 | HTTP and works well with popular server software. */
|
---|
983 |
|
---|
984 | int found;
|
---|
985 | ip_address ip;
|
---|
986 | struct address_list *al;
|
---|
987 |
|
---|
988 | if (ssl)
|
---|
989 | /* Don't try to talk to two different SSL sites over the same
|
---|
990 | secure connection! (Besides, it's not clear that
|
---|
991 | name-based virtual hosting is even possible with SSL.) */
|
---|
992 | return 0;
|
---|
993 |
|
---|
994 | /* If pconn.socket's peer is one of the IP addresses HOST
|
---|
995 | resolves to, pconn.socket is for all intents and purposes
|
---|
996 | already talking to HOST. */
|
---|
997 |
|
---|
998 | if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
|
---|
999 | {
|
---|
1000 | /* Can't get the peer's address -- something must be very
|
---|
1001 | wrong with the connection. */
|
---|
1002 | invalidate_persistent ();
|
---|
1003 | return 0;
|
---|
1004 | }
|
---|
1005 | al = lookup_host (host, 0);
|
---|
1006 | if (!al)
|
---|
1007 | {
|
---|
1008 | *host_lookup_failed = 1;
|
---|
1009 | return 0;
|
---|
1010 | }
|
---|
1011 |
|
---|
1012 | found = address_list_contains (al, &ip);
|
---|
1013 | address_list_release (al);
|
---|
1014 |
|
---|
1015 | if (!found)
|
---|
1016 | return 0;
|
---|
1017 |
|
---|
1018 | /* The persistent connection's peer address was found among the
|
---|
1019 | addresses HOST resolved to; therefore, pconn.sock is in fact
|
---|
1020 | already talking to HOST -- no need to reconnect. */
|
---|
1021 | }
|
---|
1022 |
|
---|
1023 | /* Finally, check whether the connection is still open. This is
|
---|
1024 | important because most server implement a liberal (short) timeout
|
---|
1025 | on persistent connections. Wget can of course always reconnect
|
---|
1026 | if the connection doesn't work out, but it's nicer to know in
|
---|
1027 | advance. This test is a logical followup of the first test, but
|
---|
1028 | is "expensive" and therefore placed at the end of the list. */
|
---|
1029 |
|
---|
1030 | if (!test_socket_open (pconn.socket))
|
---|
1031 | {
|
---|
1032 | /* Oops, the socket is no longer open. Now that we know that,
|
---|
1033 | let's invalidate the persistent connection before returning
|
---|
1034 | 0. */
|
---|
1035 | invalidate_persistent ();
|
---|
1036 | return 0;
|
---|
1037 | }
|
---|
1038 |
|
---|
1039 | return 1;
|
---|
1040 | }
|
---|
1041 |
|
---|
1042 | /* The idea behind these two CLOSE macros is to distinguish between
|
---|
1043 | two cases: one when the job we've been doing is finished, and we
|
---|
1044 | want to close the connection and leave, and two when something is
|
---|
1045 | seriously wrong and we're closing the connection as part of
|
---|
1046 | cleanup.
|
---|
1047 |
|
---|
1048 | In case of keep_alive, CLOSE_FINISH should leave the connection
|
---|
1049 | open, while CLOSE_INVALIDATE should still close it.
|
---|
1050 |
|
---|
1051 | Note that the semantics of the flag `keep_alive' is "this
|
---|
1052 | connection *will* be reused (the server has promised not to close
|
---|
1053 | the connection once we're done)", while the semantics of
|
---|
1054 | `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
|
---|
1055 | active, registered connection". */
|
---|
1056 |
|
---|
1057 | #define CLOSE_FINISH(fd) do { \
|
---|
1058 | if (!keep_alive) \
|
---|
1059 | { \
|
---|
1060 | if (pconn_active && (fd) == pconn.socket) \
|
---|
1061 | invalidate_persistent (); \
|
---|
1062 | else \
|
---|
1063 | { \
|
---|
1064 | fd_close (fd); \
|
---|
1065 | fd = -1; \
|
---|
1066 | } \
|
---|
1067 | } \
|
---|
1068 | } while (0)
|
---|
1069 |
|
---|
1070 | #define CLOSE_INVALIDATE(fd) do { \
|
---|
1071 | if (pconn_active && (fd) == pconn.socket) \
|
---|
1072 | invalidate_persistent (); \
|
---|
1073 | else \
|
---|
1074 | fd_close (fd); \
|
---|
1075 | fd = -1; \
|
---|
1076 | } while (0)
|
---|
1077 | |
---|
1078 |
|
---|
1079 | struct http_stat
|
---|
1080 | {
|
---|
1081 | wgint len; /* received length */
|
---|
1082 | wgint contlen; /* expected length */
|
---|
1083 | wgint restval; /* the restart value */
|
---|
1084 | int res; /* the result of last read */
|
---|
1085 | char *newloc; /* new location (redirection) */
|
---|
1086 | char *remote_time; /* remote time-stamp string */
|
---|
1087 | char *error; /* textual HTTP error */
|
---|
1088 | int statcode; /* status code */
|
---|
1089 | wgint rd_size; /* amount of data read from socket */
|
---|
1090 | double dltime; /* time it took to download the data */
|
---|
1091 | const char *referer; /* value of the referer header. */
|
---|
1092 | char **local_file; /* local file. */
|
---|
1093 | };
|
---|
1094 |
|
---|
1095 | static void
|
---|
1096 | free_hstat (struct http_stat *hs)
|
---|
1097 | {
|
---|
1098 | xfree_null (hs->newloc);
|
---|
1099 | xfree_null (hs->remote_time);
|
---|
1100 | xfree_null (hs->error);
|
---|
1101 |
|
---|
1102 | /* Guard against being called twice. */
|
---|
1103 | hs->newloc = NULL;
|
---|
1104 | hs->remote_time = NULL;
|
---|
1105 | hs->error = NULL;
|
---|
1106 | }
|
---|
1107 |
|
---|
1108 | static char *create_authorization_line PARAMS ((const char *, const char *,
|
---|
1109 | const char *, const char *,
|
---|
1110 | const char *, int *));
|
---|
1111 | static char *basic_authentication_encode PARAMS ((const char *, const char *));
|
---|
1112 | static int known_authentication_scheme_p PARAMS ((const char *, const char *));
|
---|
1113 |
|
---|
1114 | time_t http_atotm PARAMS ((const char *));
|
---|
1115 |
|
---|
1116 | #define BEGINS_WITH(line, string_constant) \
|
---|
1117 | (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
|
---|
1118 | && (ISSPACE (line[sizeof (string_constant) - 1]) \
|
---|
1119 | || !line[sizeof (string_constant) - 1]))
|
---|
1120 |
|
---|
1121 | #define SET_USER_AGENT(req) do { \
|
---|
1122 | if (!opt.useragent) \
|
---|
1123 | request_set_header (req, "User-Agent", \
|
---|
1124 | aprintf ("Wget/%s", version_string), rel_value); \
|
---|
1125 | else if (*opt.useragent) \
|
---|
1126 | request_set_header (req, "User-Agent", opt.useragent, rel_none); \
|
---|
1127 | } while (0)
|
---|
1128 |
|
---|
1129 | /* The flags that allow clobbering the file (opening with "wb").
|
---|
1130 | Defined here to avoid repetition later. #### This will require
|
---|
1131 | rework. */
|
---|
1132 | #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
|
---|
1133 | || opt.dirstruct || opt.output_document)
|
---|
1134 |
|
---|
1135 | /* Retrieve a document through HTTP protocol. It recognizes status
|
---|
1136 | code, and correctly handles redirections. It closes the network
|
---|
1137 | socket. If it receives an error from the functions below it, it
|
---|
1138 | will print it if there is enough information to do so (almost
|
---|
1139 | always), returning the error to the caller (i.e. http_loop).
|
---|
1140 |
|
---|
1141 | Various HTTP parameters are stored to hs.
|
---|
1142 |
|
---|
1143 | If PROXY is non-NULL, the connection will be made to the proxy
|
---|
1144 | server, and u->url will be requested. */
|
---|
1145 | static uerr_t
|
---|
1146 | gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
---|
1147 | {
|
---|
1148 | struct request *req;
|
---|
1149 |
|
---|
1150 | char *type;
|
---|
1151 | char *user, *passwd;
|
---|
1152 | char *proxyauth;
|
---|
1153 | int statcode;
|
---|
1154 | int write_error;
|
---|
1155 | wgint contlen, contrange;
|
---|
1156 | struct url *conn;
|
---|
1157 | FILE *fp;
|
---|
1158 |
|
---|
1159 | int sock = -1;
|
---|
1160 | int flags;
|
---|
1161 |
|
---|
1162 | /* Set to 1 when the authorization has failed permanently and should
|
---|
1163 | not be tried again. */
|
---|
1164 | int auth_finished = 0;
|
---|
1165 |
|
---|
1166 | /* Whether NTLM authentication is used for this request. */
|
---|
1167 | int ntlm_seen = 0;
|
---|
1168 |
|
---|
1169 | /* Whether our connection to the remote host is through SSL. */
|
---|
1170 | int using_ssl = 0;
|
---|
1171 |
|
---|
1172 | /* Whether a HEAD request will be issued (as opposed to GET or
|
---|
1173 | POST). */
|
---|
1174 | int head_only = *dt & HEAD_ONLY;
|
---|
1175 |
|
---|
1176 | char *head;
|
---|
1177 | struct response *resp;
|
---|
1178 | char hdrval[256];
|
---|
1179 | char *message;
|
---|
1180 |
|
---|
1181 | /* Whether this connection will be kept alive after the HTTP request
|
---|
1182 | is done. */
|
---|
1183 | int keep_alive;
|
---|
1184 |
|
---|
1185 | /* Whether keep-alive should be inhibited.
|
---|
1186 |
|
---|
1187 | RFC 2068 requests that 1.0 clients not send keep-alive requests
|
---|
1188 | to proxies. This is because many 1.0 proxies do not interpret
|
---|
1189 | the Connection header and transfer it to the remote server,
|
---|
1190 | causing it to not close the connection and leave both the proxy
|
---|
1191 | and the client hanging. */
|
---|
1192 | int inhibit_keep_alive =
|
---|
1193 | !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
|
---|
1194 |
|
---|
1195 | /* Headers sent when using POST. */
|
---|
1196 | wgint post_data_size = 0;
|
---|
1197 |
|
---|
1198 | int host_lookup_failed = 0;
|
---|
1199 |
|
---|
1200 | #ifdef HAVE_SSL
|
---|
1201 | if (u->scheme == SCHEME_HTTPS)
|
---|
1202 | {
|
---|
1203 | /* Initialize the SSL context. After this has once been done,
|
---|
1204 | it becomes a no-op. */
|
---|
1205 | if (!ssl_init ())
|
---|
1206 | {
|
---|
1207 | scheme_disable (SCHEME_HTTPS);
|
---|
1208 | logprintf (LOG_NOTQUIET,
|
---|
1209 | _("Disabling SSL due to encountered errors.\n"));
|
---|
1210 | return SSLINITFAILED;
|
---|
1211 | }
|
---|
1212 | }
|
---|
1213 | #endif /* HAVE_SSL */
|
---|
1214 |
|
---|
1215 | if (!head_only)
|
---|
1216 | /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
|
---|
1217 | know the local filename so we can save to it. */
|
---|
1218 | assert (*hs->local_file != NULL);
|
---|
1219 |
|
---|
1220 | /* Initialize certain elements of struct http_stat. */
|
---|
1221 | hs->len = 0;
|
---|
1222 | hs->contlen = -1;
|
---|
1223 | hs->res = -1;
|
---|
1224 | hs->newloc = NULL;
|
---|
1225 | hs->remote_time = NULL;
|
---|
1226 | hs->error = NULL;
|
---|
1227 |
|
---|
1228 | conn = u;
|
---|
1229 |
|
---|
1230 | /* Prepare the request to send. */
|
---|
1231 |
|
---|
1232 | req = request_new ();
|
---|
1233 | {
|
---|
1234 | char *meth_arg;
|
---|
1235 | const char *meth = "GET";
|
---|
1236 | if (head_only)
|
---|
1237 | meth = "HEAD";
|
---|
1238 | else if (opt.post_file_name || opt.post_data)
|
---|
1239 | meth = "POST";
|
---|
1240 | /* Use the full path, i.e. one that includes the leading slash and
|
---|
1241 | the query string. E.g. if u->path is "foo/bar" and u->query is
|
---|
1242 | "param=value", full_path will be "/foo/bar?param=value". */
|
---|
1243 | if (proxy
|
---|
1244 | #ifdef HAVE_SSL
|
---|
1245 | /* When using SSL over proxy, CONNECT establishes a direct
|
---|
1246 | connection to the HTTPS server. Therefore use the same
|
---|
1247 | argument as when talking to the server directly. */
|
---|
1248 | && u->scheme != SCHEME_HTTPS
|
---|
1249 | #endif
|
---|
1250 | )
|
---|
1251 | meth_arg = xstrdup (u->url);
|
---|
1252 | else
|
---|
1253 | meth_arg = url_full_path (u);
|
---|
1254 | request_set_method (req, meth, meth_arg);
|
---|
1255 | }
|
---|
1256 |
|
---|
1257 | request_set_header (req, "Referer", (char *) hs->referer, rel_none);
|
---|
1258 | if (*dt & SEND_NOCACHE)
|
---|
1259 | request_set_header (req, "Pragma", "no-cache", rel_none);
|
---|
1260 | if (hs->restval)
|
---|
1261 | request_set_header (req, "Range",
|
---|
1262 | aprintf ("bytes=%s-",
|
---|
1263 | number_to_static_string (hs->restval)),
|
---|
1264 | rel_value);
|
---|
1265 | SET_USER_AGENT (req);
|
---|
1266 | request_set_header (req, "Accept", "*/*", rel_none);
|
---|
1267 |
|
---|
1268 | /* Find the username and password for authentication. */
|
---|
1269 | user = u->user;
|
---|
1270 | passwd = u->passwd;
|
---|
1271 | search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
|
---|
1272 | user = user ? user : (opt.http_user ? opt.http_user : opt.user);
|
---|
1273 | passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
|
---|
1274 |
|
---|
1275 | if (user && passwd)
|
---|
1276 | {
|
---|
1277 | /* We have the username and the password, but haven't tried
|
---|
1278 | any authorization yet. Let's see if the "Basic" method
|
---|
1279 | works. If not, we'll come back here and construct a
|
---|
1280 | proper authorization method with the right challenges.
|
---|
1281 |
|
---|
1282 | If we didn't employ this kind of logic, every URL that
|
---|
1283 | requires authorization would have to be processed twice,
|
---|
1284 | which is very suboptimal and generates a bunch of false
|
---|
1285 | "unauthorized" errors in the server log.
|
---|
1286 |
|
---|
1287 | #### But this logic also has a serious problem when used
|
---|
1288 | with stronger authentications: we *first* transmit the
|
---|
1289 | username and the password in clear text, and *then* attempt a
|
---|
1290 | stronger authentication scheme. That cannot be right! We
|
---|
1291 | are only fortunate that almost everyone still uses the
|
---|
1292 | `Basic' scheme anyway.
|
---|
1293 |
|
---|
1294 | There should be an option to prevent this from happening, for
|
---|
1295 | those who use strong authentication schemes and value their
|
---|
1296 | passwords. */
|
---|
1297 | request_set_header (req, "Authorization",
|
---|
1298 | basic_authentication_encode (user, passwd),
|
---|
1299 | rel_value);
|
---|
1300 | }
|
---|
1301 |
|
---|
1302 | proxyauth = NULL;
|
---|
1303 | if (proxy)
|
---|
1304 | {
|
---|
1305 | char *proxy_user, *proxy_passwd;
|
---|
1306 | /* For normal username and password, URL components override
|
---|
1307 | command-line/wgetrc parameters. With proxy
|
---|
1308 | authentication, it's the reverse, because proxy URLs are
|
---|
1309 | normally the "permanent" ones, so command-line args
|
---|
1310 | should take precedence. */
|
---|
1311 | if (opt.proxy_user && opt.proxy_passwd)
|
---|
1312 | {
|
---|
1313 | proxy_user = opt.proxy_user;
|
---|
1314 | proxy_passwd = opt.proxy_passwd;
|
---|
1315 | }
|
---|
1316 | else
|
---|
1317 | {
|
---|
1318 | proxy_user = proxy->user;
|
---|
1319 | proxy_passwd = proxy->passwd;
|
---|
1320 | }
|
---|
1321 | /* #### This does not appear right. Can't the proxy request,
|
---|
1322 | say, `Digest' authentication? */
|
---|
1323 | if (proxy_user && proxy_passwd)
|
---|
1324 | proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
|
---|
1325 |
|
---|
1326 | /* If we're using a proxy, we will be connecting to the proxy
|
---|
1327 | server. */
|
---|
1328 | conn = proxy;
|
---|
1329 |
|
---|
1330 | /* Proxy authorization over SSL is handled below. */
|
---|
1331 | #ifdef HAVE_SSL
|
---|
1332 | if (u->scheme != SCHEME_HTTPS)
|
---|
1333 | #endif
|
---|
1334 | request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
|
---|
1335 | }
|
---|
1336 |
|
---|
1337 | {
|
---|
1338 | /* Whether we need to print the host header with braces around
|
---|
1339 | host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
|
---|
1340 | usual "Host: symbolic-name:1234". */
|
---|
1341 | int squares = strchr (u->host, ':') != NULL;
|
---|
1342 | if (u->port == scheme_default_port (u->scheme))
|
---|
1343 | request_set_header (req, "Host",
|
---|
1344 | aprintf (squares ? "[%s]" : "%s", u->host),
|
---|
1345 | rel_value);
|
---|
1346 | else
|
---|
1347 | request_set_header (req, "Host",
|
---|
1348 | aprintf (squares ? "[%s]:%d" : "%s:%d",
|
---|
1349 | u->host, u->port),
|
---|
1350 | rel_value);
|
---|
1351 | }
|
---|
1352 |
|
---|
1353 | if (!inhibit_keep_alive)
|
---|
1354 | request_set_header (req, "Connection", "Keep-Alive", rel_none);
|
---|
1355 |
|
---|
1356 | if (opt.cookies)
|
---|
1357 | request_set_header (req, "Cookie",
|
---|
1358 | cookie_header (wget_cookie_jar,
|
---|
1359 | u->host, u->port, u->path,
|
---|
1360 | #ifdef HAVE_SSL
|
---|
1361 | u->scheme == SCHEME_HTTPS
|
---|
1362 | #else
|
---|
1363 | 0
|
---|
1364 | #endif
|
---|
1365 | ),
|
---|
1366 | rel_value);
|
---|
1367 |
|
---|
1368 | if (opt.post_data || opt.post_file_name)
|
---|
1369 | {
|
---|
1370 | request_set_header (req, "Content-Type",
|
---|
1371 | "application/x-www-form-urlencoded", rel_none);
|
---|
1372 | if (opt.post_data)
|
---|
1373 | post_data_size = strlen (opt.post_data);
|
---|
1374 | else
|
---|
1375 | {
|
---|
1376 | post_data_size = file_size (opt.post_file_name);
|
---|
1377 | if (post_data_size == -1)
|
---|
1378 | {
|
---|
1379 | logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
|
---|
1380 | opt.post_file_name, strerror (errno));
|
---|
1381 | post_data_size = 0;
|
---|
1382 | }
|
---|
1383 | }
|
---|
1384 | request_set_header (req, "Content-Length",
|
---|
1385 | xstrdup (number_to_static_string (post_data_size)),
|
---|
1386 | rel_value);
|
---|
1387 | }
|
---|
1388 |
|
---|
1389 | /* Add the user headers. */
|
---|
1390 | if (opt.user_headers)
|
---|
1391 | {
|
---|
1392 | int i;
|
---|
1393 | for (i = 0; opt.user_headers[i]; i++)
|
---|
1394 | request_set_user_header (req, opt.user_headers[i]);
|
---|
1395 | }
|
---|
1396 |
|
---|
1397 | retry_with_auth:
|
---|
1398 | /* We need to come back here when the initial attempt to retrieve
|
---|
1399 | without authorization header fails. (Expected to happen at least
|
---|
1400 | for the Digest authorization scheme.) */
|
---|
1401 |
|
---|
1402 | keep_alive = 0;
|
---|
1403 |
|
---|
1404 | /* Establish the connection. */
|
---|
1405 |
|
---|
1406 | if (!inhibit_keep_alive)
|
---|
1407 | {
|
---|
1408 | /* Look for a persistent connection to target host, unless a
|
---|
1409 | proxy is used. The exception is when SSL is in use, in which
|
---|
1410 | case the proxy is nothing but a passthrough to the target
|
---|
1411 | host, registered as a connection to the latter. */
|
---|
1412 | struct url *relevant = conn;
|
---|
1413 | #ifdef HAVE_SSL
|
---|
1414 | if (u->scheme == SCHEME_HTTPS)
|
---|
1415 | relevant = u;
|
---|
1416 | #endif
|
---|
1417 |
|
---|
1418 | if (persistent_available_p (relevant->host, relevant->port,
|
---|
1419 | #ifdef HAVE_SSL
|
---|
1420 | relevant->scheme == SCHEME_HTTPS,
|
---|
1421 | #else
|
---|
1422 | 0,
|
---|
1423 | #endif
|
---|
1424 | &host_lookup_failed))
|
---|
1425 | {
|
---|
1426 | sock = pconn.socket;
|
---|
1427 | using_ssl = pconn.ssl;
|
---|
1428 | logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
|
---|
1429 | escnonprint (pconn.host), pconn.port);
|
---|
1430 | DEBUGP (("Reusing fd %d.\n", sock));
|
---|
1431 | if (pconn.authorized)
|
---|
1432 | /* If the connection is already authorized, the "Basic"
|
---|
1433 | authorization added by code above is unnecessary and
|
---|
1434 | only hurts us. */
|
---|
1435 | request_remove_header (req, "Authorization");
|
---|
1436 | }
|
---|
1437 | }
|
---|
1438 |
|
---|
1439 | if (sock < 0)
|
---|
1440 | {
|
---|
1441 | /* In its current implementation, persistent_available_p will
|
---|
1442 | look up conn->host in some cases. If that lookup failed, we
|
---|
1443 | don't need to bother with connect_to_host. */
|
---|
1444 | if (host_lookup_failed)
|
---|
1445 | {
|
---|
1446 | request_free (req);
|
---|
1447 | return HOSTERR;
|
---|
1448 | }
|
---|
1449 |
|
---|
1450 | sock = connect_to_host (conn->host, conn->port);
|
---|
1451 | if (sock == E_HOST)
|
---|
1452 | {
|
---|
1453 | request_free (req);
|
---|
1454 | return HOSTERR;
|
---|
1455 | }
|
---|
1456 | else if (sock < 0)
|
---|
1457 | {
|
---|
1458 | request_free (req);
|
---|
1459 | return (retryable_socket_connect_error (errno)
|
---|
1460 | ? CONERROR : CONIMPOSSIBLE);
|
---|
1461 | }
|
---|
1462 |
|
---|
1463 | #ifdef HAVE_SSL
|
---|
1464 | if (proxy && u->scheme == SCHEME_HTTPS)
|
---|
1465 | {
|
---|
1466 | /* When requesting SSL URLs through proxies, use the
|
---|
1467 | CONNECT method to request passthrough. */
|
---|
1468 | struct request *connreq = request_new ();
|
---|
1469 | request_set_method (connreq, "CONNECT",
|
---|
1470 | aprintf ("%s:%d", u->host, u->port));
|
---|
1471 | SET_USER_AGENT (connreq);
|
---|
1472 | if (proxyauth)
|
---|
1473 | {
|
---|
1474 | request_set_header (connreq, "Proxy-Authorization",
|
---|
1475 | proxyauth, rel_value);
|
---|
1476 | /* Now that PROXYAUTH is part of the CONNECT request,
|
---|
1477 | zero it out so we don't send proxy authorization with
|
---|
1478 | the regular request below. */
|
---|
1479 | proxyauth = NULL;
|
---|
1480 | }
|
---|
1481 | /* Examples in rfc2817 use the Host header in CONNECT
|
---|
1482 | requests. I don't see how that gains anything, given
|
---|
1483 | that the contents of Host would be exactly the same as
|
---|
1484 | the contents of CONNECT. */
|
---|
1485 |
|
---|
1486 | write_error = request_send (connreq, sock);
|
---|
1487 | request_free (connreq);
|
---|
1488 | if (write_error < 0)
|
---|
1489 | {
|
---|
1490 | CLOSE_INVALIDATE (sock);
|
---|
1491 | return WRITEFAILED;
|
---|
1492 | }
|
---|
1493 |
|
---|
1494 | head = read_http_response_head (sock);
|
---|
1495 | if (!head)
|
---|
1496 | {
|
---|
1497 | logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
|
---|
1498 | strerror (errno));
|
---|
1499 | CLOSE_INVALIDATE (sock);
|
---|
1500 | return HERR;
|
---|
1501 | }
|
---|
1502 | message = NULL;
|
---|
1503 | if (!*head)
|
---|
1504 | {
|
---|
1505 | xfree (head);
|
---|
1506 | goto failed_tunnel;
|
---|
1507 | }
|
---|
1508 | DEBUGP (("proxy responded with: [%s]\n", head));
|
---|
1509 |
|
---|
1510 | resp = resp_new (head);
|
---|
1511 | statcode = resp_status (resp, &message);
|
---|
1512 | resp_free (resp);
|
---|
1513 | xfree (head);
|
---|
1514 | if (statcode != 200)
|
---|
1515 | {
|
---|
1516 | failed_tunnel:
|
---|
1517 | logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
|
---|
1518 | message ? escnonprint (message) : "?");
|
---|
1519 | xfree_null (message);
|
---|
1520 | return CONSSLERR;
|
---|
1521 | }
|
---|
1522 | xfree_null (message);
|
---|
1523 |
|
---|
1524 | /* SOCK is now *really* connected to u->host, so update CONN
|
---|
1525 | to reflect this. That way register_persistent will
|
---|
1526 | register SOCK as being connected to u->host:u->port. */
|
---|
1527 | conn = u;
|
---|
1528 | }
|
---|
1529 |
|
---|
1530 | if (conn->scheme == SCHEME_HTTPS)
|
---|
1531 | {
|
---|
1532 | if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
|
---|
1533 | {
|
---|
1534 | fd_close (sock);
|
---|
1535 | return CONSSLERR;
|
---|
1536 | }
|
---|
1537 | using_ssl = 1;
|
---|
1538 | }
|
---|
1539 | #endif /* HAVE_SSL */
|
---|
1540 | }
|
---|
1541 |
|
---|
1542 | /* Send the request to server. */
|
---|
1543 | write_error = request_send (req, sock);
|
---|
1544 |
|
---|
1545 | if (write_error >= 0)
|
---|
1546 | {
|
---|
1547 | if (opt.post_data)
|
---|
1548 | {
|
---|
1549 | DEBUGP (("[POST data: %s]\n", opt.post_data));
|
---|
1550 | write_error = fd_write (sock, opt.post_data, post_data_size, -1.0);
|
---|
1551 | }
|
---|
1552 | else if (opt.post_file_name && post_data_size != 0)
|
---|
1553 | write_error = post_file (sock, opt.post_file_name, post_data_size);
|
---|
1554 | }
|
---|
1555 |
|
---|
1556 | if (write_error < 0)
|
---|
1557 | {
|
---|
1558 | CLOSE_INVALIDATE (sock);
|
---|
1559 | request_free (req);
|
---|
1560 | return WRITEFAILED;
|
---|
1561 | }
|
---|
1562 | logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
|
---|
1563 | proxy ? "Proxy" : "HTTP");
|
---|
1564 | contlen = -1;
|
---|
1565 | contrange = 0;
|
---|
1566 | *dt &= ~RETROKF;
|
---|
1567 |
|
---|
1568 | head = read_http_response_head (sock);
|
---|
1569 | if (!head)
|
---|
1570 | {
|
---|
1571 | if (errno == 0)
|
---|
1572 | {
|
---|
1573 | logputs (LOG_NOTQUIET, _("No data received.\n"));
|
---|
1574 | CLOSE_INVALIDATE (sock);
|
---|
1575 | request_free (req);
|
---|
1576 | return HEOF;
|
---|
1577 | }
|
---|
1578 | else
|
---|
1579 | {
|
---|
1580 | logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
|
---|
1581 | strerror (errno));
|
---|
1582 | CLOSE_INVALIDATE (sock);
|
---|
1583 | request_free (req);
|
---|
1584 | return HERR;
|
---|
1585 | }
|
---|
1586 | }
|
---|
1587 | DEBUGP (("\n---response begin---\n%s---response end---\n", head));
|
---|
1588 |
|
---|
1589 | resp = resp_new (head);
|
---|
1590 |
|
---|
1591 | /* Check for status line. */
|
---|
1592 | message = NULL;
|
---|
1593 | statcode = resp_status (resp, &message);
|
---|
1594 | if (!opt.server_response)
|
---|
1595 | logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
|
---|
1596 | message ? escnonprint (message) : "");
|
---|
1597 | else
|
---|
1598 | {
|
---|
1599 | logprintf (LOG_VERBOSE, "\n");
|
---|
1600 | print_server_response (resp, " ");
|
---|
1601 | }
|
---|
1602 |
|
---|
1603 | if (!opt.ignore_length
|
---|
1604 | && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
|
---|
1605 | {
|
---|
1606 | wgint parsed;
|
---|
1607 | errno = 0;
|
---|
1608 | parsed = str_to_wgint (hdrval, NULL, 10);
|
---|
1609 | if (parsed == WGINT_MAX && errno == ERANGE)
|
---|
1610 | /* Out of range.
|
---|
1611 | #### If Content-Length is out of range, it most likely
|
---|
1612 | means that the file is larger than 2G and that we're
|
---|
1613 | compiled without LFS. In that case we should probably
|
---|
1614 | refuse to even attempt to download the file. */
|
---|
1615 | contlen = -1;
|
---|
1616 | else
|
---|
1617 | contlen = parsed;
|
---|
1618 | }
|
---|
1619 |
|
---|
1620 | /* Check for keep-alive related responses. */
|
---|
1621 | if (!inhibit_keep_alive && contlen != -1)
|
---|
1622 | {
|
---|
1623 | if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
|
---|
1624 | keep_alive = 1;
|
---|
1625 | else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
|
---|
1626 | {
|
---|
1627 | if (0 == strcasecmp (hdrval, "Keep-Alive"))
|
---|
1628 | keep_alive = 1;
|
---|
1629 | }
|
---|
1630 | }
|
---|
1631 | if (keep_alive)
|
---|
1632 | /* The server has promised that it will not close the connection
|
---|
1633 | when we're done. This means that we can register it. */
|
---|
1634 | register_persistent (conn->host, conn->port, sock, using_ssl);
|
---|
1635 |
|
---|
1636 | if (statcode == HTTP_STATUS_UNAUTHORIZED)
|
---|
1637 | {
|
---|
1638 | /* Authorization is required. */
|
---|
1639 | if (keep_alive && !head_only && skip_short_body (sock, contlen))
|
---|
1640 | CLOSE_FINISH (sock);
|
---|
1641 | else
|
---|
1642 | CLOSE_INVALIDATE (sock);
|
---|
1643 | pconn.authorized = 0;
|
---|
1644 | if (!auth_finished && (user && passwd))
|
---|
1645 | {
|
---|
1646 | /* IIS sends multiple copies of WWW-Authenticate, one with
|
---|
1647 | the value "negotiate", and other(s) with data. Loop over
|
---|
1648 | all the occurrences and pick the one we recognize. */
|
---|
1649 | int wapos;
|
---|
1650 | const char *wabeg, *waend;
|
---|
1651 | char *www_authenticate = NULL;
|
---|
1652 | for (wapos = 0;
|
---|
1653 | (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
|
---|
1654 | &wabeg, &waend)) != -1;
|
---|
1655 | ++wapos)
|
---|
1656 | if (known_authentication_scheme_p (wabeg, waend))
|
---|
1657 | {
|
---|
1658 | BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
|
---|
1659 | break;
|
---|
1660 | }
|
---|
1661 |
|
---|
1662 | if (!www_authenticate)
|
---|
1663 | /* If the authentication header is missing or
|
---|
1664 | unrecognized, there's no sense in retrying. */
|
---|
1665 | logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
|
---|
1666 | else if (BEGINS_WITH (www_authenticate, "Basic"))
|
---|
1667 | /* If the authentication scheme is "Basic", which we send
|
---|
1668 | by default, there's no sense in retrying either. (This
|
---|
1669 | should be changed when we stop sending "Basic" data by
|
---|
1670 | default.) */
|
---|
1671 | ;
|
---|
1672 | else
|
---|
1673 | {
|
---|
1674 | char *pth;
|
---|
1675 | pth = url_full_path (u);
|
---|
1676 | request_set_header (req, "Authorization",
|
---|
1677 | create_authorization_line (www_authenticate,
|
---|
1678 | user, passwd,
|
---|
1679 | request_method (req),
|
---|
1680 | pth,
|
---|
1681 | &auth_finished),
|
---|
1682 | rel_value);
|
---|
1683 | if (BEGINS_WITH (www_authenticate, "NTLM"))
|
---|
1684 | ntlm_seen = 1;
|
---|
1685 | xfree (pth);
|
---|
1686 | goto retry_with_auth;
|
---|
1687 | }
|
---|
1688 | }
|
---|
1689 | logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
|
---|
1690 | request_free (req);
|
---|
1691 | return AUTHFAILED;
|
---|
1692 | }
|
---|
1693 | else /* statcode != HTTP_STATUS_UNAUTHORIZED */
|
---|
1694 | {
|
---|
1695 | /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
|
---|
1696 | if (ntlm_seen)
|
---|
1697 | pconn.authorized = 1;
|
---|
1698 | }
|
---|
1699 | request_free (req);
|
---|
1700 |
|
---|
1701 | hs->statcode = statcode;
|
---|
1702 | if (statcode == -1)
|
---|
1703 | hs->error = xstrdup (_("Malformed status line"));
|
---|
1704 | else if (!*message)
|
---|
1705 | hs->error = xstrdup (_("(no description)"));
|
---|
1706 | else
|
---|
1707 | hs->error = xstrdup (message);
|
---|
1708 | xfree_null (message);
|
---|
1709 |
|
---|
1710 | type = resp_header_strdup (resp, "Content-Type");
|
---|
1711 | if (type)
|
---|
1712 | {
|
---|
1713 | char *tmp = strchr (type, ';');
|
---|
1714 | if (tmp)
|
---|
1715 | {
|
---|
1716 | while (tmp > type && ISSPACE (tmp[-1]))
|
---|
1717 | --tmp;
|
---|
1718 | *tmp = '\0';
|
---|
1719 | }
|
---|
1720 | }
|
---|
1721 | hs->newloc = resp_header_strdup (resp, "Location");
|
---|
1722 | hs->remote_time = resp_header_strdup (resp, "Last-Modified");
|
---|
1723 |
|
---|
1724 | /* Handle (possibly multiple instances of) the Set-Cookie header. */
|
---|
1725 | if (opt.cookies)
|
---|
1726 | {
|
---|
1727 | int scpos;
|
---|
1728 | const char *scbeg, *scend;
|
---|
1729 | /* The jar should have been created by now. */
|
---|
1730 | assert (wget_cookie_jar != NULL);
|
---|
1731 | for (scpos = 0;
|
---|
1732 | (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
|
---|
1733 | &scbeg, &scend)) != -1;
|
---|
1734 | ++scpos)
|
---|
1735 | {
|
---|
1736 | char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
|
---|
1737 | cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
|
---|
1738 | u->path, set_cookie);
|
---|
1739 | }
|
---|
1740 | }
|
---|
1741 |
|
---|
1742 | if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
|
---|
1743 | {
|
---|
1744 | wgint first_byte_pos, last_byte_pos, entity_length;
|
---|
1745 | if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
|
---|
1746 | &entity_length))
|
---|
1747 | contrange = first_byte_pos;
|
---|
1748 | }
|
---|
1749 | resp_free (resp);
|
---|
1750 |
|
---|
1751 | /* 20x responses are counted among successful by default. */
|
---|
1752 | if (H_20X (statcode))
|
---|
1753 | *dt |= RETROKF;
|
---|
1754 |
|
---|
1755 | /* Return if redirected. */
|
---|
1756 | if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
|
---|
1757 | {
|
---|
1758 | /* RFC2068 says that in case of the 300 (multiple choices)
|
---|
1759 | response, the server can output a preferred URL through
|
---|
1760 | `Location' header; otherwise, the request should be treated
|
---|
1761 | like GET. So, if the location is set, it will be a
|
---|
1762 | redirection; otherwise, just proceed normally. */
|
---|
1763 | if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
|
---|
1764 | *dt |= RETROKF;
|
---|
1765 | else
|
---|
1766 | {
|
---|
1767 | logprintf (LOG_VERBOSE,
|
---|
1768 | _("Location: %s%s\n"),
|
---|
1769 | hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
|
---|
1770 | hs->newloc ? _(" [following]") : "");
|
---|
1771 | if (keep_alive && !head_only && skip_short_body (sock, contlen))
|
---|
1772 | CLOSE_FINISH (sock);
|
---|
1773 | else
|
---|
1774 | CLOSE_INVALIDATE (sock);
|
---|
1775 | xfree_null (type);
|
---|
1776 | return NEWLOCATION;
|
---|
1777 | }
|
---|
1778 | }
|
---|
1779 |
|
---|
1780 | /* If content-type is not given, assume text/html. This is because
|
---|
1781 | of the multitude of broken CGI's that "forget" to generate the
|
---|
1782 | content-type. */
|
---|
1783 | if (!type ||
|
---|
1784 | 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
|
---|
1785 | 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
|
---|
1786 | *dt |= TEXTHTML;
|
---|
1787 | else
|
---|
1788 | *dt &= ~TEXTHTML;
|
---|
1789 |
|
---|
1790 | if (opt.html_extension && (*dt & TEXTHTML))
|
---|
1791 | /* -E / --html-extension / html_extension = on was specified, and this is a
|
---|
1792 | text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
---|
1793 | already the file's suffix, tack on ".html". */
|
---|
1794 | {
|
---|
1795 | char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
|
---|
1796 |
|
---|
1797 | if (last_period_in_local_filename == NULL
|
---|
1798 | || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
|
---|
1799 | || 0 == strcasecmp (last_period_in_local_filename, ".html")))
|
---|
1800 | {
|
---|
1801 | int local_filename_len = strlen (*hs->local_file);
|
---|
1802 | /* Resize the local file, allowing for ".html" preceded by
|
---|
1803 | optional ".NUMBER". */
|
---|
1804 | *hs->local_file = xrealloc (*hs->local_file,
|
---|
1805 | local_filename_len + 24 + sizeof (".html"));
|
---|
1806 | strcpy(*hs->local_file + local_filename_len, ".html");
|
---|
1807 | /* If clobbering is not allowed and the file, as named,
|
---|
1808 | exists, tack on ".NUMBER.html" instead. */
|
---|
1809 | if (!ALLOW_CLOBBER)
|
---|
1810 | {
|
---|
1811 | int ext_num = 1;
|
---|
1812 | do
|
---|
1813 | sprintf (*hs->local_file + local_filename_len,
|
---|
1814 | ".%d.html", ext_num++);
|
---|
1815 | while (file_exists_p (*hs->local_file));
|
---|
1816 | }
|
---|
1817 | *dt |= ADDED_HTML_EXTENSION;
|
---|
1818 | }
|
---|
1819 | }
|
---|
1820 |
|
---|
1821 | if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
|
---|
1822 | {
|
---|
1823 | /* If `-c' is in use and the file has been fully downloaded (or
|
---|
1824 | the remote file has shrunk), Wget effectively requests bytes
|
---|
1825 | after the end of file and the server response with 416. */
|
---|
1826 | logputs (LOG_VERBOSE, _("\
|
---|
1827 | \n The file is already fully retrieved; nothing to do.\n\n"));
|
---|
1828 | /* In case the caller inspects. */
|
---|
1829 | hs->len = contlen;
|
---|
1830 | hs->res = 0;
|
---|
1831 | /* Mark as successfully retrieved. */
|
---|
1832 | *dt |= RETROKF;
|
---|
1833 | xfree_null (type);
|
---|
1834 | CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
---|
1835 | might be more bytes in the body. */
|
---|
1836 | return RETRUNNEEDED;
|
---|
1837 | }
|
---|
1838 | if ((contrange != 0 && contrange != hs->restval)
|
---|
1839 | || (H_PARTIAL (statcode) && !contrange))
|
---|
1840 | {
|
---|
1841 | /* The Range request was somehow misunderstood by the server.
|
---|
1842 | Bail out. */
|
---|
1843 | xfree_null (type);
|
---|
1844 | CLOSE_INVALIDATE (sock);
|
---|
1845 | return RANGEERR;
|
---|
1846 | }
|
---|
1847 | hs->contlen = contlen + contrange;
|
---|
1848 |
|
---|
1849 | if (opt.verbose)
|
---|
1850 | {
|
---|
1851 | if (*dt & RETROKF)
|
---|
1852 | {
|
---|
1853 | /* No need to print this output if the body won't be
|
---|
1854 | downloaded at all, or if the original server response is
|
---|
1855 | printed. */
|
---|
1856 | logputs (LOG_VERBOSE, _("Length: "));
|
---|
1857 | if (contlen != -1)
|
---|
1858 | {
|
---|
1859 | logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange));
|
---|
1860 | if (contlen + contrange >= 1024)
|
---|
1861 | logprintf (LOG_VERBOSE, " (%s)",
|
---|
1862 | human_readable (contlen + contrange));
|
---|
1863 | if (contrange)
|
---|
1864 | {
|
---|
1865 | if (contlen >= 1024)
|
---|
1866 | logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
|
---|
1867 | with_thousand_seps (contlen),
|
---|
1868 | human_readable (contlen));
|
---|
1869 | else
|
---|
1870 | logprintf (LOG_VERBOSE, _(", %s remaining"),
|
---|
1871 | with_thousand_seps (contlen));
|
---|
1872 | }
|
---|
1873 | }
|
---|
1874 | else
|
---|
1875 | logputs (LOG_VERBOSE,
|
---|
1876 | opt.ignore_length ? _("ignored") : _("unspecified"));
|
---|
1877 | if (type)
|
---|
1878 | logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
|
---|
1879 | else
|
---|
1880 | logputs (LOG_VERBOSE, "\n");
|
---|
1881 | }
|
---|
1882 | }
|
---|
1883 | xfree_null (type);
|
---|
1884 | type = NULL; /* We don't need it any more. */
|
---|
1885 |
|
---|
1886 | /* Return if we have no intention of further downloading. */
|
---|
1887 | if (!(*dt & RETROKF) || head_only)
|
---|
1888 | {
|
---|
1889 | /* In case the caller cares to look... */
|
---|
1890 | hs->len = 0;
|
---|
1891 | hs->res = 0;
|
---|
1892 | xfree_null (type);
|
---|
1893 | if (head_only)
|
---|
1894 | /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
|
---|
1895 | servers not to send body in response to a HEAD request. If
|
---|
1896 | you encounter such a server (more likely a broken CGI), use
|
---|
1897 | `--no-http-keep-alive'. */
|
---|
1898 | CLOSE_FINISH (sock);
|
---|
1899 | else if (keep_alive && skip_short_body (sock, contlen))
|
---|
1900 | /* Successfully skipped the body; also keep using the socket. */
|
---|
1901 | CLOSE_FINISH (sock);
|
---|
1902 | else
|
---|
1903 | CLOSE_INVALIDATE (sock);
|
---|
1904 | return RETRFINISHED;
|
---|
1905 | }
|
---|
1906 |
|
---|
1907 | /* Open the local file. */
|
---|
1908 | if (!output_stream)
|
---|
1909 | {
|
---|
1910 | mkalldirs (*hs->local_file);
|
---|
1911 | if (opt.backups)
|
---|
1912 | rotate_backups (*hs->local_file);
|
---|
1913 | if (hs->restval)
|
---|
1914 | fp = fopen (*hs->local_file, "ab");
|
---|
1915 | else if (ALLOW_CLOBBER)
|
---|
1916 | fp = fopen (*hs->local_file, "wb");
|
---|
1917 | else
|
---|
1918 | {
|
---|
1919 | fp = fopen_excl (*hs->local_file, 1);
|
---|
1920 | if (!fp && errno == EEXIST)
|
---|
1921 | {
|
---|
1922 | /* We cannot just invent a new name and use it (which is
|
---|
1923 | what functions like unique_create typically do)
|
---|
1924 | because we told the user we'd use this name.
|
---|
1925 | Instead, return and retry the download. */
|
---|
1926 | logprintf (LOG_NOTQUIET,
|
---|
1927 | _("%s has sprung into existence.\n"),
|
---|
1928 | *hs->local_file);
|
---|
1929 | CLOSE_INVALIDATE (sock);
|
---|
1930 | return FOPEN_EXCL_ERR;
|
---|
1931 | }
|
---|
1932 | }
|
---|
1933 | if (!fp)
|
---|
1934 | {
|
---|
1935 | logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
|
---|
1936 | CLOSE_INVALIDATE (sock);
|
---|
1937 | return FOPENERR;
|
---|
1938 | }
|
---|
1939 | }
|
---|
1940 | else
|
---|
1941 | fp = output_stream;
|
---|
1942 |
|
---|
1943 | /* #### This confuses the timestamping code that checks for file
|
---|
1944 | size. Maybe we should save some additional information? */
|
---|
1945 | if (opt.save_headers)
|
---|
1946 | fwrite (head, 1, strlen (head), fp);
|
---|
1947 |
|
---|
1948 | /* Now we no longer need to store the response header. */
|
---|
1949 | xfree (head);
|
---|
1950 |
|
---|
1951 | /* Download the request body. */
|
---|
1952 | flags = 0;
|
---|
1953 | if (contlen != -1)
|
---|
1954 | /* If content-length is present, read that much; otherwise, read
|
---|
1955 | until EOF. The HTTP spec doesn't require the server to
|
---|
1956 | actually close the connection when it's done sending data. */
|
---|
1957 | flags |= rb_read_exactly;
|
---|
1958 | if (hs->restval > 0 && contrange == 0)
|
---|
1959 | /* If the server ignored our range request, instruct fd_read_body
|
---|
1960 | to skip the first RESTVAL bytes of body. */
|
---|
1961 | flags |= rb_skip_startpos;
|
---|
1962 | hs->len = hs->restval;
|
---|
1963 | hs->rd_size = 0;
|
---|
1964 | hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
|
---|
1965 | hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
|
---|
1966 | flags);
|
---|
1967 |
|
---|
1968 | if (hs->res >= 0)
|
---|
1969 | CLOSE_FINISH (sock);
|
---|
1970 | else
|
---|
1971 | CLOSE_INVALIDATE (sock);
|
---|
1972 |
|
---|
1973 | {
|
---|
1974 | /* Close or flush the file. We have to be careful to check for
|
---|
1975 | error here. Checking the result of fwrite() is not enough --
|
---|
1976 | errors could go unnoticed! */
|
---|
1977 | int flush_res;
|
---|
1978 | if (!output_stream)
|
---|
1979 | flush_res = fclose (fp);
|
---|
1980 | else
|
---|
1981 | flush_res = fflush (fp);
|
---|
1982 | if (flush_res == EOF)
|
---|
1983 | hs->res = -2;
|
---|
1984 | }
|
---|
1985 | if (hs->res == -2)
|
---|
1986 | return FWRITEERR;
|
---|
1987 | return RETRFINISHED;
|
---|
1988 | }
|
---|
1989 |
|
---|
1990 | /* The genuine HTTP loop! This is the part where the retrieval is
|
---|
1991 | retried, and retried, and retried, and... */
|
---|
1992 | uerr_t
|
---|
1993 | http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
---|
1994 | int *dt, struct url *proxy)
|
---|
1995 | {
|
---|
1996 | int count;
|
---|
1997 | int use_ts, got_head = 0; /* time-stamping info */
|
---|
1998 | char *filename_plus_orig_suffix;
|
---|
1999 | char *local_filename = NULL;
|
---|
2000 | char *tms, *locf, *tmrate;
|
---|
2001 | uerr_t err;
|
---|
2002 | time_t tml = -1, tmr = -1; /* local and remote time-stamps */
|
---|
2003 | wgint local_size = 0; /* the size of the local file */
|
---|
2004 | size_t filename_len;
|
---|
2005 | struct http_stat hstat; /* HTTP status */
|
---|
2006 | struct_stat st;
|
---|
2007 | char *dummy = NULL;
|
---|
2008 |
|
---|
2009 | /* This used to be done in main(), but it's a better idea to do it
|
---|
2010 | here so that we don't go through the hoops if we're just using
|
---|
2011 | FTP or whatever. */
|
---|
2012 | if (opt.cookies)
|
---|
2013 | {
|
---|
2014 | if (!wget_cookie_jar)
|
---|
2015 | wget_cookie_jar = cookie_jar_new ();
|
---|
2016 | if (opt.cookies_input && !cookies_loaded_p)
|
---|
2017 | {
|
---|
2018 | cookie_jar_load (wget_cookie_jar, opt.cookies_input);
|
---|
2019 | cookies_loaded_p = 1;
|
---|
2020 | }
|
---|
2021 | }
|
---|
2022 |
|
---|
2023 | *newloc = NULL;
|
---|
2024 |
|
---|
2025 | /* Warn on (likely bogus) wildcard usage in HTTP. */
|
---|
2026 | if (opt.ftp_glob && has_wildcards_p (u->path))
|
---|
2027 | logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
---|
2028 |
|
---|
2029 | xzero (hstat);
|
---|
2030 |
|
---|
2031 | /* Determine the local filename. */
|
---|
2032 | if (local_file && *local_file)
|
---|
2033 | hstat.local_file = local_file;
|
---|
2034 | else if (local_file && !opt.output_document)
|
---|
2035 | {
|
---|
2036 | *local_file = url_file_name (u);
|
---|
2037 | hstat.local_file = local_file;
|
---|
2038 | }
|
---|
2039 | else
|
---|
2040 | {
|
---|
2041 | dummy = url_file_name (u);
|
---|
2042 | hstat.local_file = &dummy;
|
---|
2043 | /* be honest about where we will save the file */
|
---|
2044 | if (local_file && opt.output_document)
|
---|
2045 | *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
---|
2046 | }
|
---|
2047 |
|
---|
2048 | if (!opt.output_document)
|
---|
2049 | locf = *hstat.local_file;
|
---|
2050 | else
|
---|
2051 | locf = opt.output_document;
|
---|
2052 |
|
---|
2053 | hstat.referer = referer;
|
---|
2054 |
|
---|
2055 | filename_len = strlen (*hstat.local_file);
|
---|
2056 | filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
---|
2057 |
|
---|
2058 | if (opt.noclobber && file_exists_p (*hstat.local_file))
|
---|
2059 | {
|
---|
2060 | /* If opt.noclobber is turned on and file already exists, do not
|
---|
2061 | retrieve the file */
|
---|
2062 | logprintf (LOG_VERBOSE, _("\
|
---|
2063 | File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
---|
2064 | /* If the file is there, we suppose it's retrieved OK. */
|
---|
2065 | *dt |= RETROKF;
|
---|
2066 |
|
---|
2067 | /* #### Bogusness alert. */
|
---|
2068 | /* If its suffix is "html" or "htm" or similar, assume text/html. */
|
---|
2069 | if (has_html_suffix_p (*hstat.local_file))
|
---|
2070 | *dt |= TEXTHTML;
|
---|
2071 |
|
---|
2072 | xfree_null (dummy);
|
---|
2073 | return RETROK;
|
---|
2074 | }
|
---|
2075 |
|
---|
2076 | use_ts = 0;
|
---|
2077 | if (opt.timestamping)
|
---|
2078 | {
|
---|
2079 | int local_dot_orig_file_exists = 0;
|
---|
2080 |
|
---|
2081 | if (opt.backup_converted)
|
---|
2082 | /* If -K is specified, we'll act on the assumption that it was specified
|
---|
2083 | last time these files were downloaded as well, and instead of just
|
---|
2084 | comparing local file X against server file X, we'll compare local
|
---|
2085 | file X.orig (if extant, else X) against server file X. If -K
|
---|
2086 | _wasn't_ specified last time, or the server contains files called
|
---|
2087 | *.orig, -N will be back to not operating correctly with -k. */
|
---|
2088 | {
|
---|
2089 | /* Would a single s[n]printf() call be faster? --dan
|
---|
2090 |
|
---|
2091 | Definitely not. sprintf() is horribly slow. It's a
|
---|
2092 | different question whether the difference between the two
|
---|
2093 | affects a program. Usually I'd say "no", but at one
|
---|
2094 | point I profiled Wget, and found that a measurable and
|
---|
2095 | non-negligible amount of time was lost calling sprintf()
|
---|
2096 | in url.c. Replacing sprintf with inline calls to
|
---|
2097 | strcpy() and number_to_string() made a difference.
|
---|
2098 | --hniksic */
|
---|
2099 | memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
|
---|
2100 | memcpy (filename_plus_orig_suffix + filename_len,
|
---|
2101 | ".orig", sizeof (".orig"));
|
---|
2102 |
|
---|
2103 | /* Try to stat() the .orig file. */
|
---|
2104 | if (stat (filename_plus_orig_suffix, &st) == 0)
|
---|
2105 | {
|
---|
2106 | local_dot_orig_file_exists = 1;
|
---|
2107 | local_filename = filename_plus_orig_suffix;
|
---|
2108 | }
|
---|
2109 | }
|
---|
2110 |
|
---|
2111 | if (!local_dot_orig_file_exists)
|
---|
2112 | /* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
---|
2113 | if (stat (*hstat.local_file, &st) == 0)
|
---|
2114 | local_filename = *hstat.local_file;
|
---|
2115 |
|
---|
2116 | if (local_filename != NULL)
|
---|
2117 | /* There was a local file, so we'll check later to see if the version
|
---|
2118 | the server has is the same version we already have, allowing us to
|
---|
2119 | skip a download. */
|
---|
2120 | {
|
---|
2121 | use_ts = 1;
|
---|
2122 | tml = st.st_mtime;
|
---|
2123 | #ifdef WINDOWS
|
---|
2124 | /* Modification time granularity is 2 seconds for Windows, so
|
---|
2125 | increase local time by 1 second for later comparison. */
|
---|
2126 | tml++;
|
---|
2127 | #endif
|
---|
2128 | local_size = st.st_size;
|
---|
2129 | got_head = 0;
|
---|
2130 | }
|
---|
2131 | }
|
---|
2132 | /* Reset the counter. */
|
---|
2133 | count = 0;
|
---|
2134 | *dt = 0;
|
---|
2135 | /* THE loop */
|
---|
2136 | do
|
---|
2137 | {
|
---|
2138 | /* Increment the pass counter. */
|
---|
2139 | ++count;
|
---|
2140 | sleep_between_retrievals (count);
|
---|
2141 | /* Get the current time string. */
|
---|
2142 | tms = time_str (NULL);
|
---|
2143 | /* Print fetch message, if opt.verbose. */
|
---|
2144 | if (opt.verbose)
|
---|
2145 | {
|
---|
2146 | char *hurl = url_string (u, 1);
|
---|
2147 | char tmp[256];
|
---|
2148 | strcpy (tmp, " ");
|
---|
2149 | if (count > 1)
|
---|
2150 | sprintf (tmp, _("(try:%2d)"), count);
|
---|
2151 | logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
|
---|
2152 | tms, hurl, tmp, locf);
|
---|
2153 | #ifdef WINDOWS
|
---|
2154 | ws_changetitle (hurl);
|
---|
2155 | #endif
|
---|
2156 | xfree (hurl);
|
---|
2157 | }
|
---|
2158 |
|
---|
2159 | /* Default document type is empty. However, if spider mode is
|
---|
2160 | on or time-stamping is employed, HEAD_ONLY commands is
|
---|
2161 | encoded within *dt. */
|
---|
2162 | if (opt.spider || (use_ts && !got_head))
|
---|
2163 | *dt |= HEAD_ONLY;
|
---|
2164 | else
|
---|
2165 | *dt &= ~HEAD_ONLY;
|
---|
2166 |
|
---|
2167 | /* Decide whether or not to restart. */
|
---|
2168 | if (opt.always_rest
|
---|
2169 | && stat (locf, &st) == 0
|
---|
2170 | && S_ISREG (st.st_mode))
|
---|
2171 | /* When -c is used, continue from on-disk size. (Can't use
|
---|
2172 | hstat.len even if count>1 because we don't want a failed
|
---|
2173 | first attempt to clobber existing data.) */
|
---|
2174 | hstat.restval = st.st_size;
|
---|
2175 | else if (count > 1)
|
---|
2176 | /* otherwise, continue where the previous try left off */
|
---|
2177 | hstat.restval = hstat.len;
|
---|
2178 | else
|
---|
2179 | hstat.restval = 0;
|
---|
2180 |
|
---|
2181 | /* Decide whether to send the no-cache directive. We send it in
|
---|
2182 | two cases:
|
---|
2183 | a) we're using a proxy, and we're past our first retrieval.
|
---|
2184 | Some proxies are notorious for caching incomplete data, so
|
---|
2185 | we require a fresh get.
|
---|
2186 | b) caching is explicitly inhibited. */
|
---|
2187 | if ((proxy && count > 1) /* a */
|
---|
2188 | || !opt.allow_cache /* b */
|
---|
2189 | )
|
---|
2190 | *dt |= SEND_NOCACHE;
|
---|
2191 | else
|
---|
2192 | *dt &= ~SEND_NOCACHE;
|
---|
2193 |
|
---|
2194 | /* Try fetching the document, or at least its head. */
|
---|
2195 | err = gethttp (u, &hstat, dt, proxy);
|
---|
2196 |
|
---|
2197 | /* It's unfortunate that wget determines the local filename before finding
|
---|
2198 | out the Content-Type of the file. Barring a major restructuring of the
|
---|
2199 | code, we need to re-set locf here, since gethttp() may have xrealloc()d
|
---|
2200 | *hstat.local_file to tack on ".html". */
|
---|
2201 | if (!opt.output_document)
|
---|
2202 | locf = *hstat.local_file;
|
---|
2203 |
|
---|
2204 | /* Time? */
|
---|
2205 | tms = time_str (NULL);
|
---|
2206 | /* Get the new location (with or without the redirection). */
|
---|
2207 | if (hstat.newloc)
|
---|
2208 | *newloc = xstrdup (hstat.newloc);
|
---|
2209 | switch (err)
|
---|
2210 | {
|
---|
2211 | case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
|
---|
2212 | case CONERROR: case READERR: case WRITEFAILED:
|
---|
2213 | case RANGEERR: case FOPEN_EXCL_ERR:
|
---|
2214 | /* Non-fatal errors continue executing the loop, which will
|
---|
2215 | bring them to "while" statement at the end, to judge
|
---|
2216 | whether the number of tries was exceeded. */
|
---|
2217 | free_hstat (&hstat);
|
---|
2218 | printwhat (count, opt.ntry);
|
---|
2219 | if (err == FOPEN_EXCL_ERR)
|
---|
2220 | {
|
---|
2221 | /* Re-determine the file name. */
|
---|
2222 | if (local_file && *local_file)
|
---|
2223 | {
|
---|
2224 | xfree (*local_file);
|
---|
2225 | *local_file = url_file_name (u);
|
---|
2226 | hstat.local_file = local_file;
|
---|
2227 | }
|
---|
2228 | else
|
---|
2229 | {
|
---|
2230 | xfree (dummy);
|
---|
2231 | dummy = url_file_name (u);
|
---|
2232 | hstat.local_file = &dummy;
|
---|
2233 | }
|
---|
2234 | /* be honest about where we will save the file */
|
---|
2235 | if (local_file && opt.output_document)
|
---|
2236 | *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
---|
2237 | if (!opt.output_document)
|
---|
2238 | locf = *hstat.local_file;
|
---|
2239 | else
|
---|
2240 | locf = opt.output_document;
|
---|
2241 | }
|
---|
2242 | continue;
|
---|
2243 | case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
|
---|
2244 | case SSLINITFAILED: case CONTNOTSUPPORTED:
|
---|
2245 | /* Fatal errors just return from the function. */
|
---|
2246 | free_hstat (&hstat);
|
---|
2247 | xfree_null (dummy);
|
---|
2248 | return err;
|
---|
2249 | case FWRITEERR: case FOPENERR:
|
---|
2250 | /* Another fatal error. */
|
---|
2251 | logputs (LOG_VERBOSE, "\n");
|
---|
2252 | logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
---|
2253 | *hstat.local_file, strerror (errno));
|
---|
2254 | free_hstat (&hstat);
|
---|
2255 | xfree_null (dummy);
|
---|
2256 | return err;
|
---|
2257 | case CONSSLERR:
|
---|
2258 | /* Another fatal error. */
|
---|
2259 | logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
---|
2260 | free_hstat (&hstat);
|
---|
2261 | xfree_null (dummy);
|
---|
2262 | return err;
|
---|
2263 | case NEWLOCATION:
|
---|
2264 | /* Return the new location to the caller. */
|
---|
2265 | if (!hstat.newloc)
|
---|
2266 | {
|
---|
2267 | logprintf (LOG_NOTQUIET,
|
---|
2268 | _("ERROR: Redirection (%d) without location.\n"),
|
---|
2269 | hstat.statcode);
|
---|
2270 | free_hstat (&hstat);
|
---|
2271 | xfree_null (dummy);
|
---|
2272 | return WRONGCODE;
|
---|
2273 | }
|
---|
2274 | free_hstat (&hstat);
|
---|
2275 | xfree_null (dummy);
|
---|
2276 | return NEWLOCATION;
|
---|
2277 | case RETRUNNEEDED:
|
---|
2278 | /* The file was already fully retrieved. */
|
---|
2279 | free_hstat (&hstat);
|
---|
2280 | xfree_null (dummy);
|
---|
2281 | return RETROK;
|
---|
2282 | case RETRFINISHED:
|
---|
2283 | /* Deal with you later. */
|
---|
2284 | break;
|
---|
2285 | default:
|
---|
2286 | /* All possibilities should have been exhausted. */
|
---|
2287 | abort ();
|
---|
2288 | }
|
---|
2289 | if (!(*dt & RETROKF))
|
---|
2290 | {
|
---|
2291 | if (!opt.verbose)
|
---|
2292 | {
|
---|
2293 | /* #### Ugly ugly ugly! */
|
---|
2294 | char *hurl = url_string (u, 1);
|
---|
2295 | logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
|
---|
2296 | xfree (hurl);
|
---|
2297 | }
|
---|
2298 | logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
|
---|
2299 | tms, hstat.statcode, escnonprint (hstat.error));
|
---|
2300 | logputs (LOG_VERBOSE, "\n");
|
---|
2301 | free_hstat (&hstat);
|
---|
2302 | xfree_null (dummy);
|
---|
2303 | return WRONGCODE;
|
---|
2304 | }
|
---|
2305 |
|
---|
2306 | /* Did we get the time-stamp? */
|
---|
2307 | if (!got_head)
|
---|
2308 | {
|
---|
2309 | if (opt.timestamping && !hstat.remote_time)
|
---|
2310 | {
|
---|
2311 | logputs (LOG_NOTQUIET, _("\
|
---|
2312 | Last-modified header missing -- time-stamps turned off.\n"));
|
---|
2313 | }
|
---|
2314 | else if (hstat.remote_time)
|
---|
2315 | {
|
---|
2316 | /* Convert the date-string into struct tm. */
|
---|
2317 | tmr = http_atotm (hstat.remote_time);
|
---|
2318 | if (tmr == (time_t) (-1))
|
---|
2319 | logputs (LOG_VERBOSE, _("\
|
---|
2320 | Last-modified header invalid -- time-stamp ignored.\n"));
|
---|
2321 | }
|
---|
2322 | }
|
---|
2323 |
|
---|
2324 | /* The time-stamping section. */
|
---|
2325 | if (use_ts)
|
---|
2326 | {
|
---|
2327 | got_head = 1;
|
---|
2328 | *dt &= ~HEAD_ONLY;
|
---|
2329 | use_ts = 0; /* no more time-stamping */
|
---|
2330 | count = 0; /* the retrieve count for HEAD is
|
---|
2331 | reset */
|
---|
2332 | if (hstat.remote_time && tmr != (time_t) (-1))
|
---|
2333 | {
|
---|
2334 | /* Now time-stamping can be used validly. Time-stamping
|
---|
2335 | means that if the sizes of the local and remote file
|
---|
2336 | match, and local file is newer than the remote file,
|
---|
2337 | it will not be retrieved. Otherwise, the normal
|
---|
2338 | download procedure is resumed. */
|
---|
2339 | if (tml >= tmr &&
|
---|
2340 | (hstat.contlen == -1 || local_size == hstat.contlen))
|
---|
2341 | {
|
---|
2342 | logprintf (LOG_VERBOSE, _("\
|
---|
2343 | Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
---|
2344 | local_filename);
|
---|
2345 | free_hstat (&hstat);
|
---|
2346 | xfree_null (dummy);
|
---|
2347 | return RETROK;
|
---|
2348 | }
|
---|
2349 | else if (tml >= tmr)
|
---|
2350 | logprintf (LOG_VERBOSE, _("\
|
---|
2351 | The sizes do not match (local %s) -- retrieving.\n"),
|
---|
2352 | number_to_static_string (local_size));
|
---|
2353 | else
|
---|
2354 | logputs (LOG_VERBOSE,
|
---|
2355 | _("Remote file is newer, retrieving.\n"));
|
---|
2356 | }
|
---|
2357 | free_hstat (&hstat);
|
---|
2358 | continue;
|
---|
2359 | }
|
---|
2360 | if ((tmr != (time_t) (-1))
|
---|
2361 | && !opt.spider
|
---|
2362 | && ((hstat.len == hstat.contlen) ||
|
---|
2363 | ((hstat.res == 0) && (hstat.contlen == -1))))
|
---|
2364 | {
|
---|
2365 | /* #### This code repeats in http.c and ftp.c. Move it to a
|
---|
2366 | function! */
|
---|
2367 | const char *fl = NULL;
|
---|
2368 | if (opt.output_document)
|
---|
2369 | {
|
---|
2370 | if (output_stream_regular)
|
---|
2371 | fl = opt.output_document;
|
---|
2372 | }
|
---|
2373 | else
|
---|
2374 | fl = *hstat.local_file;
|
---|
2375 | if (fl)
|
---|
2376 | touch (fl, tmr);
|
---|
2377 | }
|
---|
2378 | /* End of time-stamping section. */
|
---|
2379 |
|
---|
2380 | if (opt.spider)
|
---|
2381 | {
|
---|
2382 | logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
|
---|
2383 | escnonprint (hstat.error));
|
---|
2384 | xfree_null (dummy);
|
---|
2385 | return RETROK;
|
---|
2386 | }
|
---|
2387 |
|
---|
2388 | tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
|
---|
2389 |
|
---|
2390 | if (hstat.len == hstat.contlen)
|
---|
2391 | {
|
---|
2392 | if (*dt & RETROKF)
|
---|
2393 | {
|
---|
2394 | logprintf (LOG_VERBOSE,
|
---|
2395 | _("%s (%s) - `%s' saved [%s/%s]\n\n"),
|
---|
2396 | tms, tmrate, locf,
|
---|
2397 | number_to_static_string (hstat.len),
|
---|
2398 | number_to_static_string (hstat.contlen));
|
---|
2399 | logprintf (LOG_NONVERBOSE,
|
---|
2400 | "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
|
---|
2401 | tms, u->url,
|
---|
2402 | number_to_static_string (hstat.len),
|
---|
2403 | number_to_static_string (hstat.contlen),
|
---|
2404 | locf, count);
|
---|
2405 | }
|
---|
2406 | ++opt.numurls;
|
---|
2407 | total_downloaded_bytes += hstat.len;
|
---|
2408 |
|
---|
2409 | /* Remember that we downloaded the file for later ".orig" code. */
|
---|
2410 | if (*dt & ADDED_HTML_EXTENSION)
|
---|
2411 | downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
|
---|
2412 | else
|
---|
2413 | downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
---|
2414 |
|
---|
2415 | free_hstat (&hstat);
|
---|
2416 | xfree_null (dummy);
|
---|
2417 | return RETROK;
|
---|
2418 | }
|
---|
2419 | else if (hstat.res == 0) /* No read error */
|
---|
2420 | {
|
---|
2421 | if (hstat.contlen == -1) /* We don't know how much we were supposed
|
---|
2422 | to get, so assume we succeeded. */
|
---|
2423 | {
|
---|
2424 | if (*dt & RETROKF)
|
---|
2425 | {
|
---|
2426 | logprintf (LOG_VERBOSE,
|
---|
2427 | _("%s (%s) - `%s' saved [%s]\n\n"),
|
---|
2428 | tms, tmrate, locf,
|
---|
2429 | number_to_static_string (hstat.len));
|
---|
2430 | logprintf (LOG_NONVERBOSE,
|
---|
2431 | "%s URL:%s [%s] -> \"%s\" [%d]\n",
|
---|
2432 | tms, u->url, number_to_static_string (hstat.len),
|
---|
2433 | locf, count);
|
---|
2434 | }
|
---|
2435 | ++opt.numurls;
|
---|
2436 | total_downloaded_bytes += hstat.len;
|
---|
2437 |
|
---|
2438 | /* Remember that we downloaded the file for later ".orig" code. */
|
---|
2439 | if (*dt & ADDED_HTML_EXTENSION)
|
---|
2440 | downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
|
---|
2441 | else
|
---|
2442 | downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
---|
2443 |
|
---|
2444 | free_hstat (&hstat);
|
---|
2445 | xfree_null (dummy);
|
---|
2446 | return RETROK;
|
---|
2447 | }
|
---|
2448 | else if (hstat.len < hstat.contlen) /* meaning we lost the
|
---|
2449 | connection too soon */
|
---|
2450 | {
|
---|
2451 | logprintf (LOG_VERBOSE,
|
---|
2452 | _("%s (%s) - Connection closed at byte %s. "),
|
---|
2453 | tms, tmrate, number_to_static_string (hstat.len));
|
---|
2454 | printwhat (count, opt.ntry);
|
---|
2455 | free_hstat (&hstat);
|
---|
2456 | continue;
|
---|
2457 | }
|
---|
2458 | else
|
---|
2459 | /* Getting here would mean reading more data than
|
---|
2460 | requested with content-length, which we never do. */
|
---|
2461 | abort ();
|
---|
2462 | }
|
---|
2463 | else /* now hstat.res can only be -1 */
|
---|
2464 | {
|
---|
2465 | if (hstat.contlen == -1)
|
---|
2466 | {
|
---|
2467 | logprintf (LOG_VERBOSE,
|
---|
2468 | _("%s (%s) - Read error at byte %s (%s)."),
|
---|
2469 | tms, tmrate, number_to_static_string (hstat.len),
|
---|
2470 | strerror (errno));
|
---|
2471 | printwhat (count, opt.ntry);
|
---|
2472 | free_hstat (&hstat);
|
---|
2473 | continue;
|
---|
2474 | }
|
---|
2475 | else /* hstat.res == -1 and contlen is given */
|
---|
2476 | {
|
---|
2477 | logprintf (LOG_VERBOSE,
|
---|
2478 | _("%s (%s) - Read error at byte %s/%s (%s). "),
|
---|
2479 | tms, tmrate,
|
---|
2480 | number_to_static_string (hstat.len),
|
---|
2481 | number_to_static_string (hstat.contlen),
|
---|
2482 | strerror (errno));
|
---|
2483 | printwhat (count, opt.ntry);
|
---|
2484 | free_hstat (&hstat);
|
---|
2485 | continue;
|
---|
2486 | }
|
---|
2487 | }
|
---|
2488 | /* not reached */
|
---|
2489 | }
|
---|
2490 | while (!opt.ntry || (count < opt.ntry));
|
---|
2491 | return TRYLIMEXC;
|
---|
2492 | }
|
---|
2493 | |
---|
2494 |
|
---|
2495 | /* Check whether the result of strptime() indicates success.
|
---|
2496 | strptime() returns the pointer to how far it got to in the string.
|
---|
2497 | The processing has been successful if the string is at `GMT' or
|
---|
2498 | `+X', or at the end of the string.
|
---|
2499 |
|
---|
2500 | In extended regexp parlance, the function returns 1 if P matches
|
---|
2501 | "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
|
---|
2502 | can return) is considered a failure and 0 is returned. */
|
---|
2503 | static int
|
---|
2504 | check_end (const char *p)
|
---|
2505 | {
|
---|
2506 | if (!p)
|
---|
2507 | return 0;
|
---|
2508 | while (ISSPACE (*p))
|
---|
2509 | ++p;
|
---|
2510 | if (!*p
|
---|
2511 | || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
|
---|
2512 | || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
|
---|
2513 | return 1;
|
---|
2514 | else
|
---|
2515 | return 0;
|
---|
2516 | }
|
---|
2517 |
|
---|
2518 | /* Convert the textual specification of time in TIME_STRING to the
|
---|
2519 | number of seconds since the Epoch.
|
---|
2520 |
|
---|
2521 | TIME_STRING can be in any of the three formats RFC2616 allows the
|
---|
2522 | HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
|
---|
2523 | as well as the time format used in the Set-Cookie header.
|
---|
2524 | Timezones are ignored, and should be GMT.
|
---|
2525 |
|
---|
2526 | Return the computed time_t representation, or -1 if the conversion
|
---|
2527 | fails.
|
---|
2528 |
|
---|
2529 | This function uses strptime with various string formats for parsing
|
---|
2530 | TIME_STRING. This results in a parser that is not as lenient in
|
---|
2531 | interpreting TIME_STRING as I would like it to be. Being based on
|
---|
2532 | strptime, it always allows shortened months, one-digit days, etc.,
|
---|
2533 | but due to the multitude of formats in which time can be
|
---|
2534 | represented, an ideal HTTP time parser would be even more
|
---|
2535 | forgiving. It should completely ignore things like week days and
|
---|
2536 | concentrate only on the various forms of representing years,
|
---|
2537 | months, days, hours, minutes, and seconds. For example, it would
|
---|
2538 | be nice if it accepted ISO 8601 out of the box.
|
---|
2539 |
|
---|
2540 | I've investigated free and PD code for this purpose, but none was
|
---|
2541 | usable. getdate was big and unwieldy, and had potential copyright
|
---|
2542 | issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
|
---|
2543 | distributed with phttpd, is excellent, but we cannot use it because
|
---|
2544 | it is not assigned to the FSF. So I stuck it with strptime. */
|
---|
2545 |
|
---|
2546 | time_t
|
---|
2547 | http_atotm (const char *time_string)
|
---|
2548 | {
|
---|
2549 | /* NOTE: Solaris strptime man page claims that %n and %t match white
|
---|
2550 | space, but that's not universally available. Instead, we simply
|
---|
2551 | use ` ' to mean "skip all WS", which works under all strptime
|
---|
2552 | implementations I've tested. */
|
---|
2553 |
|
---|
2554 | static const char *time_formats[] = {
|
---|
2555 | "%a, %d %b %Y %T", /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
|
---|
2556 | "%A, %d-%b-%y %T", /* rfc850: Thursday, 29-Jan-98 22:12:57 */
|
---|
2557 | "%a %b %d %T %Y", /* asctime: Thu Jan 29 22:12:57 1998 */
|
---|
2558 | "%a, %d-%b-%Y %T" /* cookies: Thu, 29-Jan-1998 22:12:57
|
---|
2559 | (used in Set-Cookie, defined in the
|
---|
2560 | Netscape cookie specification.) */
|
---|
2561 | };
|
---|
2562 | int i;
|
---|
2563 |
|
---|
2564 | for (i = 0; i < countof (time_formats); i++)
|
---|
2565 | {
|
---|
2566 | struct tm t;
|
---|
2567 |
|
---|
2568 | /* Some versions of strptime use the existing contents of struct
|
---|
2569 | tm to recalculate the date according to format. Zero it out
|
---|
2570 | to prevent garbage from the stack influencing strptime. */
|
---|
2571 | xzero (t);
|
---|
2572 |
|
---|
2573 | /* Solaris strptime fails to recognize English month names in
|
---|
2574 | non-English locales, which we work around by not setting the
|
---|
2575 | LC_TIME category. Another way would be to temporarily set
|
---|
2576 | locale to C before invoking strptime, but that's slow and
|
---|
2577 | messy. GNU strptime does not have this problem because it
|
---|
2578 | recognizes English month names along with the local ones. */
|
---|
2579 |
|
---|
2580 | if (check_end (strptime (time_string, time_formats[i], &t)))
|
---|
2581 | return timegm (&t);
|
---|
2582 | }
|
---|
2583 |
|
---|
2584 | /* All formats have failed. */
|
---|
2585 | return -1;
|
---|
2586 | }
|
---|
2587 | |
---|
2588 |
|
---|
2589 | /* Authorization support: We support three authorization schemes:
|
---|
2590 |
|
---|
2591 | * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
|
---|
2592 |
|
---|
2593 | * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
|
---|
2594 | consisting of answering to the server's challenge with the proper
|
---|
2595 | MD5 digests.
|
---|
2596 |
|
---|
2597 | * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
|
---|
2598 | Stenberg for libcurl. Like digest, NTLM is based on a
|
---|
2599 | challenge-response mechanism, but unlike digest, it is non-standard
|
---|
2600 | (authenticates TCP connections rather than requests), undocumented
|
---|
2601 | and Microsoft-specific. */
|
---|
2602 |
|
---|
2603 | /* Create the authentication header contents for the `Basic' scheme.
|
---|
2604 | This is done by encoding the string "USER:PASS" to base64 and
|
---|
2605 | prepending the string "Basic " in front of it. */
|
---|
2606 |
|
---|
2607 | static char *
|
---|
2608 | basic_authentication_encode (const char *user, const char *passwd)
|
---|
2609 | {
|
---|
2610 | char *t1, *t2;
|
---|
2611 | int len1 = strlen (user) + 1 + strlen (passwd);
|
---|
2612 |
|
---|
2613 | t1 = (char *)alloca (len1 + 1);
|
---|
2614 | sprintf (t1, "%s:%s", user, passwd);
|
---|
2615 |
|
---|
2616 | t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
|
---|
2617 | base64_encode (t1, len1, t2);
|
---|
2618 |
|
---|
2619 | return concat_strings ("Basic ", t2, (char *) 0);
|
---|
2620 | }
|
---|
2621 |
|
---|
2622 | #define SKIP_WS(x) do { \
|
---|
2623 | while (ISSPACE (*(x))) \
|
---|
2624 | ++(x); \
|
---|
2625 | } while (0)
|
---|
2626 |
|
---|
2627 | #ifdef ENABLE_DIGEST
|
---|
2628 | /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
|
---|
2629 | of a field in such a header. If the field is the one specified by
|
---|
2630 | ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
|
---|
2631 | digest authorization code), extract its value in the (char*)
|
---|
2632 | variable pointed by RET. Returns negative on a malformed header,
|
---|
2633 | or number of bytes that have been parsed by this call. */
|
---|
2634 | static int
|
---|
2635 | extract_header_attr (const char *au, const char *attr_name, char **ret)
|
---|
2636 | {
|
---|
2637 | const char *ep;
|
---|
2638 | const char *cp = au;
|
---|
2639 |
|
---|
2640 | if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
|
---|
2641 | {
|
---|
2642 | cp += strlen (attr_name);
|
---|
2643 | if (!*cp)
|
---|
2644 | return -1;
|
---|
2645 | SKIP_WS (cp);
|
---|
2646 | if (*cp != '=')
|
---|
2647 | return -1;
|
---|
2648 | if (!*++cp)
|
---|
2649 | return -1;
|
---|
2650 | SKIP_WS (cp);
|
---|
2651 | if (*cp != '\"')
|
---|
2652 | return -1;
|
---|
2653 | if (!*++cp)
|
---|
2654 | return -1;
|
---|
2655 | for (ep = cp; *ep && *ep != '\"'; ep++)
|
---|
2656 | ;
|
---|
2657 | if (!*ep)
|
---|
2658 | return -1;
|
---|
2659 | xfree_null (*ret);
|
---|
2660 | *ret = strdupdelim (cp, ep);
|
---|
2661 | return ep - au + 1;
|
---|
2662 | }
|
---|
2663 | else
|
---|
2664 | return 0;
|
---|
2665 | }
|
---|
2666 |
|
---|
2667 | /* Dump the hexadecimal representation of HASH to BUF. HASH should be
|
---|
2668 | an array of 16 bytes containing the hash keys, and BUF should be a
|
---|
2669 | buffer of 33 writable characters (32 for hex digits plus one for
|
---|
2670 | zero termination). */
|
---|
2671 | static void
|
---|
2672 | dump_hash (unsigned char *buf, const unsigned char *hash)
|
---|
2673 | {
|
---|
2674 | int i;
|
---|
2675 |
|
---|
2676 | for (i = 0; i < MD5_HASHLEN; i++, hash++)
|
---|
2677 | {
|
---|
2678 | *buf++ = XNUM_TO_digit (*hash >> 4);
|
---|
2679 | *buf++ = XNUM_TO_digit (*hash & 0xf);
|
---|
2680 | }
|
---|
2681 | *buf = '\0';
|
---|
2682 | }
|
---|
2683 |
|
---|
2684 | /* Take the line apart to find the challenge, and compose a digest
|
---|
2685 | authorization header. See RFC2069 section 2.1.2. */
|
---|
2686 | static char *
|
---|
2687 | digest_authentication_encode (const char *au, const char *user,
|
---|
2688 | const char *passwd, const char *method,
|
---|
2689 | const char *path)
|
---|
2690 | {
|
---|
2691 | static char *realm, *opaque, *nonce;
|
---|
2692 | static struct {
|
---|
2693 | const char *name;
|
---|
2694 | char **variable;
|
---|
2695 | } options[] = {
|
---|
2696 | { "realm", &realm },
|
---|
2697 | { "opaque", &opaque },
|
---|
2698 | { "nonce", &nonce }
|
---|
2699 | };
|
---|
2700 | char *res;
|
---|
2701 |
|
---|
2702 | realm = opaque = nonce = NULL;
|
---|
2703 |
|
---|
2704 | au += 6; /* skip over `Digest' */
|
---|
2705 | while (*au)
|
---|
2706 | {
|
---|
2707 | int i;
|
---|
2708 |
|
---|
2709 | SKIP_WS (au);
|
---|
2710 | for (i = 0; i < countof (options); i++)
|
---|
2711 | {
|
---|
2712 | int skip = extract_header_attr (au, options[i].name,
|
---|
2713 | options[i].variable);
|
---|
2714 | if (skip < 0)
|
---|
2715 | {
|
---|
2716 | xfree_null (realm);
|
---|
2717 | xfree_null (opaque);
|
---|
2718 | xfree_null (nonce);
|
---|
2719 | return NULL;
|
---|
2720 | }
|
---|
2721 | else if (skip)
|
---|
2722 | {
|
---|
2723 | au += skip;
|
---|
2724 | break;
|
---|
2725 | }
|
---|
2726 | }
|
---|
2727 | if (i == countof (options))
|
---|
2728 | {
|
---|
2729 | while (*au && *au != '=')
|
---|
2730 | au++;
|
---|
2731 | if (*au && *++au)
|
---|
2732 | {
|
---|
2733 | SKIP_WS (au);
|
---|
2734 | if (*au == '\"')
|
---|
2735 | {
|
---|
2736 | au++;
|
---|
2737 | while (*au && *au != '\"')
|
---|
2738 | au++;
|
---|
2739 | if (*au)
|
---|
2740 | au++;
|
---|
2741 | }
|
---|
2742 | }
|
---|
2743 | }
|
---|
2744 | while (*au && *au != ',')
|
---|
2745 | au++;
|
---|
2746 | if (*au)
|
---|
2747 | au++;
|
---|
2748 | }
|
---|
2749 | if (!realm || !nonce || !user || !passwd || !path || !method)
|
---|
2750 | {
|
---|
2751 | xfree_null (realm);
|
---|
2752 | xfree_null (opaque);
|
---|
2753 | xfree_null (nonce);
|
---|
2754 | return NULL;
|
---|
2755 | }
|
---|
2756 |
|
---|
2757 | /* Calculate the digest value. */
|
---|
2758 | {
|
---|
2759 | ALLOCA_MD5_CONTEXT (ctx);
|
---|
2760 | unsigned char hash[MD5_HASHLEN];
|
---|
2761 | unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
|
---|
2762 | unsigned char response_digest[MD5_HASHLEN * 2 + 1];
|
---|
2763 |
|
---|
2764 | /* A1BUF = H(user ":" realm ":" password) */
|
---|
2765 | gen_md5_init (ctx);
|
---|
2766 | gen_md5_update ((unsigned char *)user, strlen (user), ctx);
|
---|
2767 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
---|
2768 | gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
|
---|
2769 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
---|
2770 | gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
|
---|
2771 | gen_md5_finish (ctx, hash);
|
---|
2772 | dump_hash (a1buf, hash);
|
---|
2773 |
|
---|
2774 | /* A2BUF = H(method ":" path) */
|
---|
2775 | gen_md5_init (ctx);
|
---|
2776 | gen_md5_update ((unsigned char *)method, strlen (method), ctx);
|
---|
2777 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
---|
2778 | gen_md5_update ((unsigned char *)path, strlen (path), ctx);
|
---|
2779 | gen_md5_finish (ctx, hash);
|
---|
2780 | dump_hash (a2buf, hash);
|
---|
2781 |
|
---|
2782 | /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
|
---|
2783 | gen_md5_init (ctx);
|
---|
2784 | gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
|
---|
2785 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
---|
2786 | gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
|
---|
2787 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
---|
2788 | gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
|
---|
2789 | gen_md5_finish (ctx, hash);
|
---|
2790 | dump_hash (response_digest, hash);
|
---|
2791 |
|
---|
2792 | res = (char*) xmalloc (strlen (user)
|
---|
2793 | + strlen (user)
|
---|
2794 | + strlen (realm)
|
---|
2795 | + strlen (nonce)
|
---|
2796 | + strlen (path)
|
---|
2797 | + 2 * MD5_HASHLEN /*strlen (response_digest)*/
|
---|
2798 | + (opaque ? strlen (opaque) : 0)
|
---|
2799 | + 128);
|
---|
2800 | sprintf (res, "Digest \
|
---|
2801 | username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
|
---|
2802 | user, realm, nonce, path, response_digest);
|
---|
2803 | if (opaque)
|
---|
2804 | {
|
---|
2805 | char *p = res + strlen (res);
|
---|
2806 | strcat (p, ", opaque=\"");
|
---|
2807 | strcat (p, opaque);
|
---|
2808 | strcat (p, "\"");
|
---|
2809 | }
|
---|
2810 | }
|
---|
2811 | return res;
|
---|
2812 | }
|
---|
2813 | #endif /* ENABLE_DIGEST */
|
---|
2814 |
|
---|
2815 | /* Computing the size of a string literal must take into account that
|
---|
2816 | value returned by sizeof includes the terminating \0. */
|
---|
2817 | #define STRSIZE(literal) (sizeof (literal) - 1)
|
---|
2818 |
|
---|
2819 | /* Whether chars in [b, e) begin with the literal string provided as
|
---|
2820 | first argument and are followed by whitespace or terminating \0.
|
---|
2821 | The comparison is case-insensitive. */
|
---|
2822 | #define STARTS(literal, b, e) \
|
---|
2823 | ((e) - (b) >= STRSIZE (literal) \
|
---|
2824 | && 0 == strncasecmp (b, literal, STRSIZE (literal)) \
|
---|
2825 | && ((e) - (b) == STRSIZE (literal) \
|
---|
2826 | || ISSPACE (b[STRSIZE (literal)])))
|
---|
2827 |
|
---|
2828 | static int
|
---|
2829 | known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
|
---|
2830 | {
|
---|
2831 | return STARTS ("Basic", hdrbeg, hdrend)
|
---|
2832 | #ifdef ENABLE_DIGEST
|
---|
2833 | || STARTS ("Digest", hdrbeg, hdrend)
|
---|
2834 | #endif
|
---|
2835 | #ifdef ENABLE_NTLM
|
---|
2836 | || STARTS ("NTLM", hdrbeg, hdrend)
|
---|
2837 | #endif
|
---|
2838 | ;
|
---|
2839 | }
|
---|
2840 |
|
---|
2841 | #undef STARTS
|
---|
2842 |
|
---|
2843 | /* Create the HTTP authorization request header. When the
|
---|
2844 | `WWW-Authenticate' response header is seen, according to the
|
---|
2845 | authorization scheme specified in that header (`Basic' and `Digest'
|
---|
2846 | are supported by the current implementation), produce an
|
---|
2847 | appropriate HTTP authorization request header. */
|
---|
2848 | static char *
|
---|
2849 | create_authorization_line (const char *au, const char *user,
|
---|
2850 | const char *passwd, const char *method,
|
---|
2851 | const char *path, int *finished)
|
---|
2852 | {
|
---|
2853 | /* We are called only with known schemes, so we can dispatch on the
|
---|
2854 | first letter. */
|
---|
2855 | switch (TOUPPER (*au))
|
---|
2856 | {
|
---|
2857 | case 'B': /* Basic */
|
---|
2858 | *finished = 1;
|
---|
2859 | return basic_authentication_encode (user, passwd);
|
---|
2860 | #ifdef ENABLE_DIGEST
|
---|
2861 | case 'D': /* Digest */
|
---|
2862 | *finished = 1;
|
---|
2863 | return digest_authentication_encode (au, user, passwd, method, path);
|
---|
2864 | #endif
|
---|
2865 | #ifdef ENABLE_NTLM
|
---|
2866 | case 'N': /* NTLM */
|
---|
2867 | if (!ntlm_input (&pconn.ntlm, au))
|
---|
2868 | {
|
---|
2869 | *finished = 1;
|
---|
2870 | return NULL;
|
---|
2871 | }
|
---|
2872 | return ntlm_output (&pconn.ntlm, user, passwd, finished);
|
---|
2873 | #endif
|
---|
2874 | default:
|
---|
2875 | /* We shouldn't get here -- this function should be only called
|
---|
2876 | with values approved by known_authentication_scheme_p. */
|
---|
2877 | abort ();
|
---|
2878 | }
|
---|
2879 | }
|
---|
2880 | |
---|
2881 |
|
---|
2882 | void
|
---|
2883 | save_cookies (void)
|
---|
2884 | {
|
---|
2885 | if (wget_cookie_jar)
|
---|
2886 | cookie_jar_save (wget_cookie_jar, opt.cookies_output);
|
---|
2887 | }
|
---|
2888 |
|
---|
2889 | void
|
---|
2890 | http_cleanup (void)
|
---|
2891 | {
|
---|
2892 | xfree_null (pconn.host);
|
---|
2893 | if (wget_cookie_jar)
|
---|
2894 | cookie_jar_delete (wget_cookie_jar);
|
---|
2895 | }
|
---|