| 1 | /* HTTP support.
|
|---|
| 2 | Copyright (C) 2005 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This file is part of GNU Wget.
|
|---|
| 5 |
|
|---|
| 6 | GNU Wget is free software; you can redistribute it and/or modify
|
|---|
| 7 | it under the terms of the GNU General Public License as published by
|
|---|
| 8 | the Free Software Foundation; either version 2 of the License, or
|
|---|
| 9 | (at your option) any later version.
|
|---|
| 10 |
|
|---|
| 11 | GNU Wget is distributed in the hope that it will be useful,
|
|---|
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 14 | GNU General Public License for more details.
|
|---|
| 15 |
|
|---|
| 16 | You should have received a copy of the GNU General Public License
|
|---|
| 17 | along with Wget; if not, write to the Free Software
|
|---|
| 18 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|---|
| 19 |
|
|---|
| 20 | In addition, as a special exception, the Free Software Foundation
|
|---|
| 21 | gives permission to link the code of its release of Wget with the
|
|---|
| 22 | OpenSSL project's "OpenSSL" library (or with modified versions of it
|
|---|
| 23 | that use the same license as the "OpenSSL" library), and distribute
|
|---|
| 24 | the linked executables. You must obey the GNU General Public License
|
|---|
| 25 | in all respects for all of the code used other than "OpenSSL". If you
|
|---|
| 26 | modify this file, you may extend this exception to your version of the
|
|---|
| 27 | file, but you are not obligated to do so. If you do not wish to do
|
|---|
| 28 | so, delete this exception statement from your version. */
|
|---|
| 29 |
|
|---|
| 30 | #include <config.h>
|
|---|
| 31 |
|
|---|
| 32 | #include <stdio.h>
|
|---|
| 33 | #include <stdlib.h>
|
|---|
| 34 | #include <sys/types.h>
|
|---|
| 35 | #ifdef HAVE_STRING_H
|
|---|
| 36 | # include <string.h>
|
|---|
| 37 | #else
|
|---|
| 38 | # include <strings.h>
|
|---|
| 39 | #endif
|
|---|
| 40 | #ifdef HAVE_UNISTD_H
|
|---|
| 41 | # include <unistd.h>
|
|---|
| 42 | #endif
|
|---|
| 43 | #include <assert.h>
|
|---|
| 44 | #include <errno.h>
|
|---|
| 45 | #if TIME_WITH_SYS_TIME
|
|---|
| 46 | # include <sys/time.h>
|
|---|
| 47 | # include <time.h>
|
|---|
| 48 | #else
|
|---|
| 49 | # if HAVE_SYS_TIME_H
|
|---|
| 50 | # include <sys/time.h>
|
|---|
| 51 | # else
|
|---|
| 52 | # include <time.h>
|
|---|
| 53 | # endif
|
|---|
| 54 | #endif
|
|---|
| 55 | #ifndef errno
|
|---|
| 56 | extern int errno;
|
|---|
| 57 | #endif
|
|---|
| 58 |
|
|---|
| 59 | #include "wget.h"
|
|---|
| 60 | #include "utils.h"
|
|---|
| 61 | #include "url.h"
|
|---|
| 62 | #include "host.h"
|
|---|
| 63 | #include "retr.h"
|
|---|
| 64 | #include "connect.h"
|
|---|
| 65 | #include "netrc.h"
|
|---|
| 66 | #ifdef HAVE_SSL
|
|---|
| 67 | # include "ssl.h"
|
|---|
| 68 | #endif
|
|---|
| 69 | #ifdef ENABLE_NTLM
|
|---|
| 70 | # include "http-ntlm.h"
|
|---|
| 71 | #endif
|
|---|
| 72 | #include "cookies.h"
|
|---|
| 73 | #ifdef ENABLE_DIGEST
|
|---|
| 74 | # include "gen-md5.h"
|
|---|
| 75 | #endif
|
|---|
| 76 | #include "convert.h"
|
|---|
| 77 |
|
|---|
| 78 | extern char *version_string;
|
|---|
| 79 | extern SUM_SIZE_INT total_downloaded_bytes;
|
|---|
| 80 |
|
|---|
| 81 | extern FILE *output_stream;
|
|---|
| 82 | extern int output_stream_regular;
|
|---|
| 83 |
|
|---|
| 84 | #ifndef MIN
|
|---|
| 85 | # define MIN(x, y) ((x) > (y) ? (y) : (x))
|
|---|
| 86 | #endif
|
|---|
| 87 |
|
|---|
| 88 | |
|---|
| 89 |
|
|---|
| 90 | static int cookies_loaded_p;
|
|---|
| 91 | static struct cookie_jar *wget_cookie_jar;
|
|---|
| 92 |
|
|---|
| 93 | #define TEXTHTML_S "text/html"
|
|---|
| 94 | #define TEXTXHTML_S "application/xhtml+xml"
|
|---|
| 95 |
|
|---|
| 96 | /* Some status code validation macros: */
|
|---|
| 97 | #define H_20X(x) (((x) >= 200) && ((x) < 300))
|
|---|
| 98 | #define H_PARTIAL(x) ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
|
|---|
| 99 | #define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY \
|
|---|
| 100 | || (x) == HTTP_STATUS_MOVED_TEMPORARILY \
|
|---|
| 101 | || (x) == HTTP_STATUS_SEE_OTHER \
|
|---|
| 102 | || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
|
|---|
| 103 |
|
|---|
| 104 | /* HTTP/1.0 status codes from RFC1945, provided for reference. */
|
|---|
| 105 | /* Successful 2xx. */
|
|---|
| 106 | #define HTTP_STATUS_OK 200
|
|---|
| 107 | #define HTTP_STATUS_CREATED 201
|
|---|
| 108 | #define HTTP_STATUS_ACCEPTED 202
|
|---|
| 109 | #define HTTP_STATUS_NO_CONTENT 204
|
|---|
| 110 | #define HTTP_STATUS_PARTIAL_CONTENTS 206
|
|---|
| 111 |
|
|---|
| 112 | /* Redirection 3xx. */
|
|---|
| 113 | #define HTTP_STATUS_MULTIPLE_CHOICES 300
|
|---|
| 114 | #define HTTP_STATUS_MOVED_PERMANENTLY 301
|
|---|
| 115 | #define HTTP_STATUS_MOVED_TEMPORARILY 302
|
|---|
| 116 | #define HTTP_STATUS_SEE_OTHER 303 /* from HTTP/1.1 */
|
|---|
| 117 | #define HTTP_STATUS_NOT_MODIFIED 304
|
|---|
| 118 | #define HTTP_STATUS_TEMPORARY_REDIRECT 307 /* from HTTP/1.1 */
|
|---|
| 119 |
|
|---|
| 120 | /* Client error 4xx. */
|
|---|
| 121 | #define HTTP_STATUS_BAD_REQUEST 400
|
|---|
| 122 | #define HTTP_STATUS_UNAUTHORIZED 401
|
|---|
| 123 | #define HTTP_STATUS_FORBIDDEN 403
|
|---|
| 124 | #define HTTP_STATUS_NOT_FOUND 404
|
|---|
| 125 | #define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
|
|---|
| 126 |
|
|---|
| 127 | /* Server errors 5xx. */
|
|---|
| 128 | #define HTTP_STATUS_INTERNAL 500
|
|---|
| 129 | #define HTTP_STATUS_NOT_IMPLEMENTED 501
|
|---|
| 130 | #define HTTP_STATUS_BAD_GATEWAY 502
|
|---|
| 131 | #define HTTP_STATUS_UNAVAILABLE 503
|
|---|
| 132 | |
|---|
| 133 |
|
|---|
| 134 | enum rp {
|
|---|
| 135 | rel_none, rel_name, rel_value, rel_both
|
|---|
| 136 | };
|
|---|
| 137 |
|
|---|
| 138 | struct request {
|
|---|
| 139 | const char *method;
|
|---|
| 140 | char *arg;
|
|---|
| 141 |
|
|---|
| 142 | struct request_header {
|
|---|
| 143 | char *name, *value;
|
|---|
| 144 | enum rp release_policy;
|
|---|
| 145 | } *headers;
|
|---|
| 146 | int hcount, hcapacity;
|
|---|
| 147 | };
|
|---|
| 148 |
|
|---|
| 149 | /* Create a new, empty request. At least request_set_method must be
|
|---|
| 150 | called before the request can be used. */
|
|---|
| 151 |
|
|---|
| 152 | static struct request *
|
|---|
| 153 | request_new (void)
|
|---|
| 154 | {
|
|---|
| 155 | struct request *req = xnew0 (struct request);
|
|---|
| 156 | req->hcapacity = 8;
|
|---|
| 157 | req->headers = xnew_array (struct request_header, req->hcapacity);
|
|---|
| 158 | return req;
|
|---|
| 159 | }
|
|---|
| 160 |
|
|---|
| 161 | /* Set the request's method and its arguments. METH should be a
|
|---|
| 162 | literal string (or it should outlive the request) because it will
|
|---|
| 163 | not be freed. ARG will be freed by request_free. */
|
|---|
| 164 |
|
|---|
| 165 | static void
|
|---|
| 166 | request_set_method (struct request *req, const char *meth, char *arg)
|
|---|
| 167 | {
|
|---|
| 168 | req->method = meth;
|
|---|
| 169 | req->arg = arg;
|
|---|
| 170 | }
|
|---|
| 171 |
|
|---|
| 172 | /* Return the method string passed with the last call to
|
|---|
| 173 | request_set_method. */
|
|---|
| 174 |
|
|---|
| 175 | static const char *
|
|---|
| 176 | request_method (const struct request *req)
|
|---|
| 177 | {
|
|---|
| 178 | return req->method;
|
|---|
| 179 | }
|
|---|
| 180 |
|
|---|
| 181 | /* Free one header according to the release policy specified with
|
|---|
| 182 | request_set_header. */
|
|---|
| 183 |
|
|---|
| 184 | static void
|
|---|
| 185 | release_header (struct request_header *hdr)
|
|---|
| 186 | {
|
|---|
| 187 | switch (hdr->release_policy)
|
|---|
| 188 | {
|
|---|
| 189 | case rel_none:
|
|---|
| 190 | break;
|
|---|
| 191 | case rel_name:
|
|---|
| 192 | xfree (hdr->name);
|
|---|
| 193 | break;
|
|---|
| 194 | case rel_value:
|
|---|
| 195 | xfree (hdr->value);
|
|---|
| 196 | break;
|
|---|
| 197 | case rel_both:
|
|---|
| 198 | xfree (hdr->name);
|
|---|
| 199 | xfree (hdr->value);
|
|---|
| 200 | break;
|
|---|
| 201 | }
|
|---|
| 202 | }
|
|---|
| 203 |
|
|---|
| 204 | /* Set the request named NAME to VALUE. Specifically, this means that
|
|---|
| 205 | a "NAME: VALUE\r\n" header line will be used in the request. If a
|
|---|
| 206 | header with the same name previously existed in the request, its
|
|---|
| 207 | value will be replaced by this one. A NULL value means do nothing.
|
|---|
| 208 |
|
|---|
| 209 | RELEASE_POLICY determines whether NAME and VALUE should be released
|
|---|
| 210 | (freed) with request_free. Allowed values are:
|
|---|
| 211 |
|
|---|
| 212 | - rel_none - don't free NAME or VALUE
|
|---|
| 213 | - rel_name - free NAME when done
|
|---|
| 214 | - rel_value - free VALUE when done
|
|---|
| 215 | - rel_both - free both NAME and VALUE when done
|
|---|
| 216 |
|
|---|
| 217 | Setting release policy is useful when arguments come from different
|
|---|
| 218 | sources. For example:
|
|---|
| 219 |
|
|---|
| 220 | // Don't free literal strings!
|
|---|
| 221 | request_set_header (req, "Pragma", "no-cache", rel_none);
|
|---|
| 222 |
|
|---|
| 223 | // Don't free a global variable, we'll need it later.
|
|---|
| 224 | request_set_header (req, "Referer", opt.referer, rel_none);
|
|---|
| 225 |
|
|---|
| 226 | // Value freshly allocated, free it when done.
|
|---|
| 227 | request_set_header (req, "Range",
|
|---|
| 228 | aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
|
|---|
| 229 | rel_value);
|
|---|
| 230 | */
|
|---|
| 231 |
|
|---|
| 232 | static void
|
|---|
| 233 | request_set_header (struct request *req, char *name, char *value,
|
|---|
| 234 | enum rp release_policy)
|
|---|
| 235 | {
|
|---|
| 236 | struct request_header *hdr;
|
|---|
| 237 | int i;
|
|---|
| 238 |
|
|---|
| 239 | if (!value)
|
|---|
| 240 | {
|
|---|
| 241 | /* A NULL value is a no-op; if freeing the name is requested,
|
|---|
| 242 | free it now to avoid leaks. */
|
|---|
| 243 | if (release_policy == rel_name || release_policy == rel_both)
|
|---|
| 244 | xfree (name);
|
|---|
| 245 | return;
|
|---|
| 246 | }
|
|---|
| 247 |
|
|---|
| 248 | for (i = 0; i < req->hcount; i++)
|
|---|
| 249 | {
|
|---|
| 250 | hdr = &req->headers[i];
|
|---|
| 251 | if (0 == strcasecmp (name, hdr->name))
|
|---|
| 252 | {
|
|---|
| 253 | /* Replace existing header. */
|
|---|
| 254 | release_header (hdr);
|
|---|
| 255 | hdr->name = name;
|
|---|
| 256 | hdr->value = value;
|
|---|
| 257 | hdr->release_policy = release_policy;
|
|---|
| 258 | return;
|
|---|
| 259 | }
|
|---|
| 260 | }
|
|---|
| 261 |
|
|---|
| 262 | /* Install new header. */
|
|---|
| 263 |
|
|---|
| 264 | if (req->hcount >= req->hcapacity)
|
|---|
| 265 | {
|
|---|
| 266 | req->hcapacity <<= 1;
|
|---|
| 267 | req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
|
|---|
| 268 | }
|
|---|
| 269 | hdr = &req->headers[req->hcount++];
|
|---|
| 270 | hdr->name = name;
|
|---|
| 271 | hdr->value = value;
|
|---|
| 272 | hdr->release_policy = release_policy;
|
|---|
| 273 | }
|
|---|
| 274 |
|
|---|
| 275 | /* Like request_set_header, but sets the whole header line, as
|
|---|
| 276 | provided by the user using the `--header' option. For example,
|
|---|
| 277 | request_set_user_header (req, "Foo: bar") works just like
|
|---|
| 278 | request_set_header (req, "Foo", "bar"). */
|
|---|
| 279 |
|
|---|
| 280 | static void
|
|---|
| 281 | request_set_user_header (struct request *req, const char *header)
|
|---|
| 282 | {
|
|---|
| 283 | char *name;
|
|---|
| 284 | const char *p = strchr (header, ':');
|
|---|
| 285 | if (!p)
|
|---|
| 286 | return;
|
|---|
| 287 | BOUNDED_TO_ALLOCA (header, p, name);
|
|---|
| 288 | ++p;
|
|---|
| 289 | while (ISSPACE (*p))
|
|---|
| 290 | ++p;
|
|---|
| 291 | request_set_header (req, xstrdup (name), (char *) p, rel_name);
|
|---|
| 292 | }
|
|---|
| 293 |
|
|---|
| 294 | /* Remove the header with specified name from REQ. Returns 1 if the
|
|---|
| 295 | header was actually removed, 0 otherwise. */
|
|---|
| 296 |
|
|---|
| 297 | static int
|
|---|
| 298 | request_remove_header (struct request *req, char *name)
|
|---|
| 299 | {
|
|---|
| 300 | int i;
|
|---|
| 301 | for (i = 0; i < req->hcount; i++)
|
|---|
| 302 | {
|
|---|
| 303 | struct request_header *hdr = &req->headers[i];
|
|---|
| 304 | if (0 == strcasecmp (name, hdr->name))
|
|---|
| 305 | {
|
|---|
| 306 | release_header (hdr);
|
|---|
| 307 | /* Move the remaining headers by one. */
|
|---|
| 308 | if (i < req->hcount - 1)
|
|---|
| 309 | memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
|
|---|
| 310 | --req->hcount;
|
|---|
| 311 | return 1;
|
|---|
| 312 | }
|
|---|
| 313 | }
|
|---|
| 314 | return 0;
|
|---|
| 315 | }
|
|---|
| 316 |
|
|---|
| 317 | #define APPEND(p, str) do { \
|
|---|
| 318 | int A_len = strlen (str); \
|
|---|
| 319 | memcpy (p, str, A_len); \
|
|---|
| 320 | p += A_len; \
|
|---|
| 321 | } while (0)
|
|---|
| 322 |
|
|---|
| 323 | /* Construct the request and write it to FD using fd_write. */
|
|---|
| 324 |
|
|---|
| 325 | static int
|
|---|
| 326 | request_send (const struct request *req, int fd)
|
|---|
| 327 | {
|
|---|
| 328 | char *request_string, *p;
|
|---|
| 329 | int i, size, write_error;
|
|---|
| 330 |
|
|---|
| 331 | /* Count the request size. */
|
|---|
| 332 | size = 0;
|
|---|
| 333 |
|
|---|
| 334 | /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
|
|---|
| 335 | size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
|
|---|
| 336 |
|
|---|
| 337 | for (i = 0; i < req->hcount; i++)
|
|---|
| 338 | {
|
|---|
| 339 | struct request_header *hdr = &req->headers[i];
|
|---|
| 340 | /* NAME ": " VALUE "\r\n" */
|
|---|
| 341 | size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
|
|---|
| 342 | }
|
|---|
| 343 |
|
|---|
| 344 | /* "\r\n\0" */
|
|---|
| 345 | size += 3;
|
|---|
| 346 |
|
|---|
| 347 | p = request_string = alloca_array (char, size);
|
|---|
| 348 |
|
|---|
| 349 | /* Generate the request. */
|
|---|
| 350 |
|
|---|
| 351 | APPEND (p, req->method); *p++ = ' ';
|
|---|
| 352 | APPEND (p, req->arg); *p++ = ' ';
|
|---|
| 353 | memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
|
|---|
| 354 |
|
|---|
| 355 | for (i = 0; i < req->hcount; i++)
|
|---|
| 356 | {
|
|---|
| 357 | struct request_header *hdr = &req->headers[i];
|
|---|
| 358 | APPEND (p, hdr->name);
|
|---|
| 359 | *p++ = ':', *p++ = ' ';
|
|---|
| 360 | APPEND (p, hdr->value);
|
|---|
| 361 | *p++ = '\r', *p++ = '\n';
|
|---|
| 362 | }
|
|---|
| 363 |
|
|---|
| 364 | *p++ = '\r', *p++ = '\n', *p++ = '\0';
|
|---|
| 365 | assert (p - request_string == size);
|
|---|
| 366 |
|
|---|
| 367 | #undef APPEND
|
|---|
| 368 |
|
|---|
| 369 | DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
|
|---|
| 370 |
|
|---|
| 371 | /* Send the request to the server. */
|
|---|
| 372 |
|
|---|
| 373 | write_error = fd_write (fd, request_string, size - 1, -1.0);
|
|---|
| 374 | if (write_error < 0)
|
|---|
| 375 | logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
|
|---|
| 376 | strerror (errno));
|
|---|
| 377 | return write_error;
|
|---|
| 378 | }
|
|---|
| 379 |
|
|---|
| 380 | /* Release the resources used by REQ. */
|
|---|
| 381 |
|
|---|
| 382 | static void
|
|---|
| 383 | request_free (struct request *req)
|
|---|
| 384 | {
|
|---|
| 385 | int i;
|
|---|
| 386 | xfree_null (req->arg);
|
|---|
| 387 | for (i = 0; i < req->hcount; i++)
|
|---|
| 388 | release_header (&req->headers[i]);
|
|---|
| 389 | xfree_null (req->headers);
|
|---|
| 390 | xfree (req);
|
|---|
| 391 | }
|
|---|
| 392 |
|
|---|
| 393 | /* Send the contents of FILE_NAME to SOCK. Make sure that exactly
|
|---|
| 394 | PROMISED_SIZE bytes are sent over the wire -- if the file is
|
|---|
| 395 | longer, read only that much; if the file is shorter, report an error. */
|
|---|
| 396 |
|
|---|
| 397 | static int
|
|---|
| 398 | post_file (int sock, const char *file_name, wgint promised_size)
|
|---|
| 399 | {
|
|---|
| 400 | static char chunk[8192];
|
|---|
| 401 | wgint written = 0;
|
|---|
| 402 | int write_error;
|
|---|
| 403 | FILE *fp;
|
|---|
| 404 |
|
|---|
| 405 | DEBUGP (("[writing POST file %s ... ", file_name));
|
|---|
| 406 |
|
|---|
| 407 | fp = fopen (file_name, "rb");
|
|---|
| 408 | if (!fp)
|
|---|
| 409 | return -1;
|
|---|
| 410 | while (!feof (fp) && written < promised_size)
|
|---|
| 411 | {
|
|---|
| 412 | int towrite;
|
|---|
| 413 | int length = fread (chunk, 1, sizeof (chunk), fp);
|
|---|
| 414 | if (length == 0)
|
|---|
| 415 | break;
|
|---|
| 416 | towrite = MIN (promised_size - written, length);
|
|---|
| 417 | write_error = fd_write (sock, chunk, towrite, -1.0);
|
|---|
| 418 | if (write_error < 0)
|
|---|
| 419 | {
|
|---|
| 420 | fclose (fp);
|
|---|
| 421 | return -1;
|
|---|
| 422 | }
|
|---|
| 423 | written += towrite;
|
|---|
| 424 | }
|
|---|
| 425 | fclose (fp);
|
|---|
| 426 |
|
|---|
| 427 | /* If we've written less than was promised, report a (probably
|
|---|
| 428 | nonsensical) error rather than break the promise. */
|
|---|
| 429 | if (written < promised_size)
|
|---|
| 430 | {
|
|---|
| 431 | errno = EINVAL;
|
|---|
| 432 | return -1;
|
|---|
| 433 | }
|
|---|
| 434 |
|
|---|
| 435 | assert (written == promised_size);
|
|---|
| 436 | DEBUGP (("done]\n"));
|
|---|
| 437 | return 0;
|
|---|
| 438 | }
|
|---|
| 439 | |
|---|
| 440 |
|
|---|
| 441 | static const char *
|
|---|
| 442 | response_head_terminator (const char *hunk, int oldlen, int peeklen)
|
|---|
| 443 | {
|
|---|
| 444 | const char *start, *end;
|
|---|
| 445 |
|
|---|
| 446 | /* If at first peek, verify whether HUNK starts with "HTTP". If
|
|---|
| 447 | not, this is a HTTP/0.9 request and we must bail out without
|
|---|
| 448 | reading anything. */
|
|---|
| 449 | if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
|
|---|
| 450 | return hunk;
|
|---|
| 451 |
|
|---|
| 452 | if (oldlen < 4)
|
|---|
| 453 | start = hunk;
|
|---|
| 454 | else
|
|---|
| 455 | start = hunk + oldlen - 4;
|
|---|
| 456 | end = hunk + oldlen + peeklen;
|
|---|
| 457 |
|
|---|
| 458 | for (; start < end - 1; start++)
|
|---|
| 459 | if (*start == '\n')
|
|---|
| 460 | {
|
|---|
| 461 | if (start < end - 2
|
|---|
| 462 | && start[1] == '\r'
|
|---|
| 463 | && start[2] == '\n')
|
|---|
| 464 | return start + 3;
|
|---|
| 465 | if (start[1] == '\n')
|
|---|
| 466 | return start + 2;
|
|---|
| 467 | }
|
|---|
| 468 | return NULL;
|
|---|
| 469 | }
|
|---|
| 470 |
|
|---|
| 471 | /* The maximum size of a single HTTP response we care to read. This
|
|---|
| 472 | is not meant to impose an arbitrary limit, but to protect the user
|
|---|
| 473 | from Wget slurping up available memory upon encountering malicious
|
|---|
| 474 | or buggy server output. Define it to 0 to remove the limit. */
|
|---|
| 475 |
|
|---|
| 476 | #define HTTP_RESPONSE_MAX_SIZE 65536
|
|---|
| 477 |
|
|---|
| 478 | /* Read the HTTP request head from FD and return it. The error
|
|---|
| 479 | conditions are the same as with fd_read_hunk.
|
|---|
| 480 |
|
|---|
| 481 | To support HTTP/0.9 responses, this function tries to make sure
|
|---|
| 482 | that the data begins with "HTTP". If this is not the case, no data
|
|---|
| 483 | is read and an empty request is returned, so that the remaining
|
|---|
| 484 | data can be treated as body. */
|
|---|
| 485 |
|
|---|
| 486 | static char *
|
|---|
| 487 | read_http_response_head (int fd)
|
|---|
| 488 | {
|
|---|
| 489 | return fd_read_hunk (fd, response_head_terminator, 512,
|
|---|
| 490 | HTTP_RESPONSE_MAX_SIZE);
|
|---|
| 491 | }
|
|---|
| 492 |
|
|---|
| 493 | struct response {
|
|---|
| 494 | /* The response data. */
|
|---|
| 495 | const char *data;
|
|---|
| 496 |
|
|---|
| 497 | /* The array of pointers that indicate where each header starts.
|
|---|
| 498 | For example, given this HTTP response:
|
|---|
| 499 |
|
|---|
| 500 | HTTP/1.0 200 Ok
|
|---|
| 501 | Description: some
|
|---|
| 502 | text
|
|---|
| 503 | Etag: x
|
|---|
| 504 |
|
|---|
| 505 | The headers are located like this:
|
|---|
| 506 |
|
|---|
| 507 | "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
|
|---|
| 508 | ^ ^ ^ ^
|
|---|
| 509 | headers[0] headers[1] headers[2] headers[3]
|
|---|
| 510 |
|
|---|
| 511 | I.e. headers[0] points to the beginning of the request,
|
|---|
| 512 | headers[1] points to the end of the first header and the
|
|---|
| 513 | beginning of the second one, etc. */
|
|---|
| 514 |
|
|---|
| 515 | const char **headers;
|
|---|
| 516 | };
|
|---|
| 517 |
|
|---|
| 518 | /* Create a new response object from the text of the HTTP response,
|
|---|
| 519 | available in HEAD. That text is automatically split into
|
|---|
| 520 | constituent header lines for fast retrieval using
|
|---|
| 521 | resp_header_*. */
|
|---|
| 522 |
|
|---|
| 523 | static struct response *
|
|---|
| 524 | resp_new (const char *head)
|
|---|
| 525 | {
|
|---|
| 526 | const char *hdr;
|
|---|
| 527 | int count, size;
|
|---|
| 528 |
|
|---|
| 529 | struct response *resp = xnew0 (struct response);
|
|---|
| 530 | resp->data = head;
|
|---|
| 531 |
|
|---|
| 532 | if (*head == '\0')
|
|---|
| 533 | {
|
|---|
| 534 | /* Empty head means that we're dealing with a headerless
|
|---|
| 535 | (HTTP/0.9) response. In that case, don't set HEADERS at
|
|---|
| 536 | all. */
|
|---|
| 537 | return resp;
|
|---|
| 538 | }
|
|---|
| 539 |
|
|---|
| 540 | /* Split HEAD into header lines, so that resp_header_* functions
|
|---|
| 541 | don't need to do this over and over again. */
|
|---|
| 542 |
|
|---|
| 543 | size = count = 0;
|
|---|
| 544 | hdr = head;
|
|---|
| 545 | while (1)
|
|---|
| 546 | {
|
|---|
| 547 | DO_REALLOC (resp->headers, size, count + 1, const char *);
|
|---|
| 548 | resp->headers[count++] = hdr;
|
|---|
| 549 |
|
|---|
| 550 | /* Break upon encountering an empty line. */
|
|---|
| 551 | if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
|
|---|
| 552 | break;
|
|---|
| 553 |
|
|---|
| 554 | /* Find the end of HDR, including continuations. */
|
|---|
| 555 | do
|
|---|
| 556 | {
|
|---|
| 557 | const char *end = strchr (hdr, '\n');
|
|---|
| 558 | if (end)
|
|---|
| 559 | hdr = end + 1;
|
|---|
| 560 | else
|
|---|
| 561 | hdr += strlen (hdr);
|
|---|
| 562 | }
|
|---|
| 563 | while (*hdr == ' ' || *hdr == '\t');
|
|---|
| 564 | }
|
|---|
| 565 | DO_REALLOC (resp->headers, size, count + 1, const char *);
|
|---|
| 566 | resp->headers[count] = NULL;
|
|---|
| 567 |
|
|---|
| 568 | return resp;
|
|---|
| 569 | }
|
|---|
| 570 |
|
|---|
| 571 | /* Locate the header named NAME in the request data, starting with
|
|---|
| 572 | position START. This allows the code to loop through the request
|
|---|
| 573 | data, filtering for all requests of a given name. Returns the
|
|---|
| 574 | found position, or -1 for failure. The code that uses this
|
|---|
| 575 | function typically looks like this:
|
|---|
| 576 |
|
|---|
| 577 | for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
|
|---|
| 578 | ... do something with header ...
|
|---|
| 579 |
|
|---|
| 580 | If you only care about one header, use resp_header_get instead of
|
|---|
| 581 | this function. */
|
|---|
| 582 |
|
|---|
| 583 | static int
|
|---|
| 584 | resp_header_locate (const struct response *resp, const char *name, int start,
|
|---|
| 585 | const char **begptr, const char **endptr)
|
|---|
| 586 | {
|
|---|
| 587 | int i;
|
|---|
| 588 | const char **headers = resp->headers;
|
|---|
| 589 | int name_len;
|
|---|
| 590 |
|
|---|
| 591 | if (!headers || !headers[1])
|
|---|
| 592 | return -1;
|
|---|
| 593 |
|
|---|
| 594 | name_len = strlen (name);
|
|---|
| 595 | if (start > 0)
|
|---|
| 596 | i = start;
|
|---|
| 597 | else
|
|---|
| 598 | i = 1;
|
|---|
| 599 |
|
|---|
| 600 | for (; headers[i + 1]; i++)
|
|---|
| 601 | {
|
|---|
| 602 | const char *b = headers[i];
|
|---|
| 603 | const char *e = headers[i + 1];
|
|---|
| 604 | if (e - b > name_len
|
|---|
| 605 | && b[name_len] == ':'
|
|---|
| 606 | && 0 == strncasecmp (b, name, name_len))
|
|---|
| 607 | {
|
|---|
| 608 | b += name_len + 1;
|
|---|
| 609 | while (b < e && ISSPACE (*b))
|
|---|
| 610 | ++b;
|
|---|
| 611 | while (b < e && ISSPACE (e[-1]))
|
|---|
| 612 | --e;
|
|---|
| 613 | *begptr = b;
|
|---|
| 614 | *endptr = e;
|
|---|
| 615 | return i;
|
|---|
| 616 | }
|
|---|
| 617 | }
|
|---|
| 618 | return -1;
|
|---|
| 619 | }
|
|---|
| 620 |
|
|---|
| 621 | /* Find and retrieve the header named NAME in the request data. If
|
|---|
| 622 | found, set *BEGPTR to its starting, and *ENDPTR to its ending
|
|---|
| 623 | position, and return 1. Otherwise return 0.
|
|---|
| 624 |
|
|---|
| 625 | This function is used as a building block for resp_header_copy
|
|---|
| 626 | and resp_header_strdup. */
|
|---|
| 627 |
|
|---|
| 628 | static int
|
|---|
| 629 | resp_header_get (const struct response *resp, const char *name,
|
|---|
| 630 | const char **begptr, const char **endptr)
|
|---|
| 631 | {
|
|---|
| 632 | int pos = resp_header_locate (resp, name, 0, begptr, endptr);
|
|---|
| 633 | return pos != -1;
|
|---|
| 634 | }
|
|---|
| 635 |
|
|---|
| 636 | /* Copy the response header named NAME to buffer BUF, no longer than
|
|---|
| 637 | BUFSIZE (BUFSIZE includes the terminating 0). If the header
|
|---|
| 638 | exists, 1 is returned, otherwise 0. If there should be no limit on
|
|---|
| 639 | the size of the header, use resp_header_strdup instead.
|
|---|
| 640 |
|
|---|
| 641 | If BUFSIZE is 0, no data is copied, but the boolean indication of
|
|---|
| 642 | whether the header is present is still returned. */
|
|---|
| 643 |
|
|---|
| 644 | static int
|
|---|
| 645 | resp_header_copy (const struct response *resp, const char *name,
|
|---|
| 646 | char *buf, int bufsize)
|
|---|
| 647 | {
|
|---|
| 648 | const char *b, *e;
|
|---|
| 649 | if (!resp_header_get (resp, name, &b, &e))
|
|---|
| 650 | return 0;
|
|---|
| 651 | if (bufsize)
|
|---|
| 652 | {
|
|---|
| 653 | int len = MIN (e - b, bufsize - 1);
|
|---|
| 654 | memcpy (buf, b, len);
|
|---|
| 655 | buf[len] = '\0';
|
|---|
| 656 | }
|
|---|
| 657 | return 1;
|
|---|
| 658 | }
|
|---|
| 659 |
|
|---|
| 660 | /* Return the value of header named NAME in RESP, allocated with
|
|---|
| 661 | malloc. If such a header does not exist in RESP, return NULL. */
|
|---|
| 662 |
|
|---|
| 663 | static char *
|
|---|
| 664 | resp_header_strdup (const struct response *resp, const char *name)
|
|---|
| 665 | {
|
|---|
| 666 | const char *b, *e;
|
|---|
| 667 | if (!resp_header_get (resp, name, &b, &e))
|
|---|
| 668 | return NULL;
|
|---|
| 669 | return strdupdelim (b, e);
|
|---|
| 670 | }
|
|---|
| 671 |
|
|---|
| 672 | /* Parse the HTTP status line, which is of format:
|
|---|
| 673 |
|
|---|
| 674 | HTTP-Version SP Status-Code SP Reason-Phrase
|
|---|
| 675 |
|
|---|
| 676 | The function returns the status-code, or -1 if the status line
|
|---|
| 677 | appears malformed. The pointer to "reason-phrase" message is
|
|---|
| 678 | returned in *MESSAGE. */
|
|---|
| 679 |
|
|---|
| 680 | static int
|
|---|
| 681 | resp_status (const struct response *resp, char **message)
|
|---|
| 682 | {
|
|---|
| 683 | int status;
|
|---|
| 684 | const char *p, *end;
|
|---|
| 685 |
|
|---|
| 686 | if (!resp->headers)
|
|---|
| 687 | {
|
|---|
| 688 | /* For a HTTP/0.9 response, assume status 200. */
|
|---|
| 689 | if (message)
|
|---|
| 690 | *message = xstrdup (_("No headers, assuming HTTP/0.9"));
|
|---|
| 691 | return 200;
|
|---|
| 692 | }
|
|---|
| 693 |
|
|---|
| 694 | p = resp->headers[0];
|
|---|
| 695 | end = resp->headers[1];
|
|---|
| 696 |
|
|---|
| 697 | if (!end)
|
|---|
| 698 | return -1;
|
|---|
| 699 |
|
|---|
| 700 | /* "HTTP" */
|
|---|
| 701 | if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
|
|---|
| 702 | return -1;
|
|---|
| 703 | p += 4;
|
|---|
| 704 |
|
|---|
| 705 | /* Match the HTTP version. This is optional because Gnutella
|
|---|
| 706 | servers have been reported to not specify HTTP version. */
|
|---|
| 707 | if (p < end && *p == '/')
|
|---|
| 708 | {
|
|---|
| 709 | ++p;
|
|---|
| 710 | while (p < end && ISDIGIT (*p))
|
|---|
| 711 | ++p;
|
|---|
| 712 | if (p < end && *p == '.')
|
|---|
| 713 | ++p;
|
|---|
| 714 | while (p < end && ISDIGIT (*p))
|
|---|
| 715 | ++p;
|
|---|
| 716 | }
|
|---|
| 717 |
|
|---|
| 718 | while (p < end && ISSPACE (*p))
|
|---|
| 719 | ++p;
|
|---|
| 720 | if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
|
|---|
| 721 | return -1;
|
|---|
| 722 |
|
|---|
| 723 | status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
|
|---|
| 724 | p += 3;
|
|---|
| 725 |
|
|---|
| 726 | if (message)
|
|---|
| 727 | {
|
|---|
| 728 | while (p < end && ISSPACE (*p))
|
|---|
| 729 | ++p;
|
|---|
| 730 | while (p < end && ISSPACE (end[-1]))
|
|---|
| 731 | --end;
|
|---|
| 732 | *message = strdupdelim (p, end);
|
|---|
| 733 | }
|
|---|
| 734 |
|
|---|
| 735 | return status;
|
|---|
| 736 | }
|
|---|
| 737 |
|
|---|
| 738 | /* Release the resources used by RESP. */
|
|---|
| 739 |
|
|---|
| 740 | static void
|
|---|
| 741 | resp_free (struct response *resp)
|
|---|
| 742 | {
|
|---|
| 743 | xfree_null (resp->headers);
|
|---|
| 744 | xfree (resp);
|
|---|
| 745 | }
|
|---|
| 746 |
|
|---|
| 747 | /* Print the server response, line by line, omitting the trailing CRLF
|
|---|
| 748 | from individual header lines, and prefixed with PREFIX. */
|
|---|
| 749 |
|
|---|
| 750 | static void
|
|---|
| 751 | print_server_response (const struct response *resp, const char *prefix)
|
|---|
| 752 | {
|
|---|
| 753 | int i;
|
|---|
| 754 | if (!resp->headers)
|
|---|
| 755 | return;
|
|---|
| 756 | for (i = 0; resp->headers[i + 1]; i++)
|
|---|
| 757 | {
|
|---|
| 758 | const char *b = resp->headers[i];
|
|---|
| 759 | const char *e = resp->headers[i + 1];
|
|---|
| 760 | /* Skip CRLF */
|
|---|
| 761 | if (b < e && e[-1] == '\n')
|
|---|
| 762 | --e;
|
|---|
| 763 | if (b < e && e[-1] == '\r')
|
|---|
| 764 | --e;
|
|---|
| 765 | /* This is safe even on printfs with broken handling of "%.<n>s"
|
|---|
| 766 | because resp->headers ends with \0. */
|
|---|
| 767 | logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, e - b, b);
|
|---|
| 768 | }
|
|---|
| 769 | }
|
|---|
| 770 |
|
|---|
| 771 | /* Parse the `Content-Range' header and extract the information it
|
|---|
| 772 | contains. Returns 1 if successful, -1 otherwise. */
|
|---|
| 773 | static int
|
|---|
| 774 | parse_content_range (const char *hdr, wgint *first_byte_ptr,
|
|---|
| 775 | wgint *last_byte_ptr, wgint *entity_length_ptr)
|
|---|
| 776 | {
|
|---|
| 777 | wgint num;
|
|---|
| 778 |
|
|---|
| 779 | /* Ancient versions of Netscape proxy server, presumably predating
|
|---|
| 780 | rfc2068, sent out `Content-Range' without the "bytes"
|
|---|
| 781 | specifier. */
|
|---|
| 782 | if (!strncasecmp (hdr, "bytes", 5))
|
|---|
| 783 | {
|
|---|
| 784 | hdr += 5;
|
|---|
| 785 | /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
|
|---|
| 786 | HTTP spec. */
|
|---|
| 787 | if (*hdr == ':')
|
|---|
| 788 | ++hdr;
|
|---|
| 789 | while (ISSPACE (*hdr))
|
|---|
| 790 | ++hdr;
|
|---|
| 791 | if (!*hdr)
|
|---|
| 792 | return 0;
|
|---|
| 793 | }
|
|---|
| 794 | if (!ISDIGIT (*hdr))
|
|---|
| 795 | return 0;
|
|---|
| 796 | for (num = 0; ISDIGIT (*hdr); hdr++)
|
|---|
| 797 | num = 10 * num + (*hdr - '0');
|
|---|
| 798 | if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
|
|---|
| 799 | return 0;
|
|---|
| 800 | *first_byte_ptr = num;
|
|---|
| 801 | ++hdr;
|
|---|
| 802 | for (num = 0; ISDIGIT (*hdr); hdr++)
|
|---|
| 803 | num = 10 * num + (*hdr - '0');
|
|---|
| 804 | if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
|
|---|
| 805 | return 0;
|
|---|
| 806 | *last_byte_ptr = num;
|
|---|
| 807 | ++hdr;
|
|---|
| 808 | for (num = 0; ISDIGIT (*hdr); hdr++)
|
|---|
| 809 | num = 10 * num + (*hdr - '0');
|
|---|
| 810 | *entity_length_ptr = num;
|
|---|
| 811 | return 1;
|
|---|
| 812 | }
|
|---|
| 813 |
|
|---|
| 814 | /* Read the body of the request, but don't store it anywhere and don't
|
|---|
| 815 | display a progress gauge. This is useful for reading the bodies of
|
|---|
| 816 | administrative responses to which we will soon issue another
|
|---|
| 817 | request. The response is not useful to the user, but reading it
|
|---|
| 818 | allows us to continue using the same connection to the server.
|
|---|
| 819 |
|
|---|
| 820 | If reading fails, 0 is returned, non-zero otherwise. In debug
|
|---|
| 821 | mode, the body is displayed for debugging purposes. */
|
|---|
| 822 |
|
|---|
| 823 | static int
|
|---|
| 824 | skip_short_body (int fd, wgint contlen)
|
|---|
| 825 | {
|
|---|
| 826 | enum {
|
|---|
| 827 | SKIP_SIZE = 512, /* size of the download buffer */
|
|---|
| 828 | SKIP_THRESHOLD = 4096 /* the largest size we read */
|
|---|
| 829 | };
|
|---|
| 830 | char dlbuf[SKIP_SIZE + 1];
|
|---|
| 831 | dlbuf[SKIP_SIZE] = '\0'; /* so DEBUGP can safely print it */
|
|---|
| 832 |
|
|---|
| 833 | /* We shouldn't get here with unknown contlen. (This will change
|
|---|
| 834 | with HTTP/1.1, which supports "chunked" transfer.) */
|
|---|
| 835 | assert (contlen != -1);
|
|---|
| 836 |
|
|---|
| 837 | /* If the body is too large, it makes more sense to simply close the
|
|---|
| 838 | connection than to try to read the body. */
|
|---|
| 839 | if (contlen > SKIP_THRESHOLD)
|
|---|
| 840 | return 0;
|
|---|
| 841 |
|
|---|
| 842 | DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
|
|---|
| 843 |
|
|---|
| 844 | while (contlen > 0)
|
|---|
| 845 | {
|
|---|
| 846 | int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1.0);
|
|---|
| 847 | if (ret <= 0)
|
|---|
| 848 | {
|
|---|
| 849 | /* Don't normally report the error since this is an
|
|---|
| 850 | optimization that should be invisible to the user. */
|
|---|
| 851 | DEBUGP (("] aborting (%s).\n",
|
|---|
| 852 | ret < 0 ? strerror (errno) : "EOF received"));
|
|---|
| 853 | return 0;
|
|---|
| 854 | }
|
|---|
| 855 | contlen -= ret;
|
|---|
| 856 | /* Safe even if %.*s bogusly expects terminating \0 because
|
|---|
| 857 | we've zero-terminated dlbuf above. */
|
|---|
| 858 | DEBUGP (("%.*s", ret, dlbuf));
|
|---|
| 859 | }
|
|---|
| 860 |
|
|---|
| 861 | DEBUGP (("] done.\n"));
|
|---|
| 862 | return 1;
|
|---|
| 863 | }
|
|---|
| 864 | |
|---|
| 865 |
|
|---|
| 866 | /* Persistent connections. Currently, we cache the most recently used
|
|---|
| 867 | connection as persistent, provided that the HTTP server agrees to
|
|---|
| 868 | make it such. The persistence data is stored in the variables
|
|---|
| 869 | below. Ideally, it should be possible to cache an arbitrary fixed
|
|---|
| 870 | number of these connections. */
|
|---|
| 871 |
|
|---|
| 872 | /* Whether a persistent connection is active. */
|
|---|
| 873 | static int pconn_active;
|
|---|
| 874 |
|
|---|
| 875 | static struct {
|
|---|
| 876 | /* The socket of the connection. */
|
|---|
| 877 | int socket;
|
|---|
| 878 |
|
|---|
| 879 | /* Host and port of the currently active persistent connection. */
|
|---|
| 880 | char *host;
|
|---|
| 881 | int port;
|
|---|
| 882 |
|
|---|
| 883 | /* Whether a ssl handshake has occoured on this connection. */
|
|---|
| 884 | int ssl;
|
|---|
| 885 |
|
|---|
| 886 | /* Whether the connection was authorized. This is only done by
|
|---|
| 887 | NTLM, which authorizes *connections* rather than individual
|
|---|
| 888 | requests. (That practice is peculiar for HTTP, but it is a
|
|---|
| 889 | useful optimization.) */
|
|---|
| 890 | int authorized;
|
|---|
| 891 |
|
|---|
| 892 | #ifdef ENABLE_NTLM
|
|---|
| 893 | /* NTLM data of the current connection. */
|
|---|
| 894 | struct ntlmdata ntlm;
|
|---|
| 895 | #endif
|
|---|
| 896 | } pconn;
|
|---|
| 897 |
|
|---|
| 898 | /* Mark the persistent connection as invalid and free the resources it
|
|---|
| 899 | uses. This is used by the CLOSE_* macros after they forcefully
|
|---|
| 900 | close a registered persistent connection. */
|
|---|
| 901 |
|
|---|
| 902 | static void
|
|---|
| 903 | invalidate_persistent (void)
|
|---|
| 904 | {
|
|---|
| 905 | DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
|
|---|
| 906 | pconn_active = 0;
|
|---|
| 907 | fd_close (pconn.socket);
|
|---|
| 908 | xfree (pconn.host);
|
|---|
| 909 | xzero (pconn);
|
|---|
| 910 | }
|
|---|
| 911 |
|
|---|
| 912 | /* Register FD, which should be a TCP/IP connection to HOST:PORT, as
|
|---|
| 913 | persistent. This will enable someone to use the same connection
|
|---|
| 914 | later. In the context of HTTP, this must be called only AFTER the
|
|---|
| 915 | response has been received and the server has promised that the
|
|---|
| 916 | connection will remain alive.
|
|---|
| 917 |
|
|---|
| 918 | If a previous connection was persistent, it is closed. */
|
|---|
| 919 |
|
|---|
| 920 | static void
|
|---|
| 921 | register_persistent (const char *host, int port, int fd, int ssl)
|
|---|
| 922 | {
|
|---|
| 923 | if (pconn_active)
|
|---|
| 924 | {
|
|---|
| 925 | if (pconn.socket == fd)
|
|---|
| 926 | {
|
|---|
| 927 | /* The connection FD is already registered. */
|
|---|
| 928 | return;
|
|---|
| 929 | }
|
|---|
| 930 | else
|
|---|
| 931 | {
|
|---|
| 932 | /* The old persistent connection is still active; close it
|
|---|
| 933 | first. This situation arises whenever a persistent
|
|---|
| 934 | connection exists, but we then connect to a different
|
|---|
| 935 | host, and try to register a persistent connection to that
|
|---|
| 936 | one. */
|
|---|
| 937 | invalidate_persistent ();
|
|---|
| 938 | }
|
|---|
| 939 | }
|
|---|
| 940 |
|
|---|
| 941 | pconn_active = 1;
|
|---|
| 942 | pconn.socket = fd;
|
|---|
| 943 | pconn.host = xstrdup (host);
|
|---|
| 944 | pconn.port = port;
|
|---|
| 945 | pconn.ssl = ssl;
|
|---|
| 946 | pconn.authorized = 0;
|
|---|
| 947 |
|
|---|
| 948 | DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
|
|---|
| 949 | }
|
|---|
| 950 |
|
|---|
| 951 | /* Return non-zero if a persistent connection is available for
|
|---|
| 952 | connecting to HOST:PORT. */
|
|---|
| 953 |
|
|---|
| 954 | static int
|
|---|
| 955 | persistent_available_p (const char *host, int port, int ssl,
|
|---|
| 956 | int *host_lookup_failed)
|
|---|
| 957 | {
|
|---|
| 958 | /* First, check whether a persistent connection is active at all. */
|
|---|
| 959 | if (!pconn_active)
|
|---|
| 960 | return 0;
|
|---|
| 961 |
|
|---|
| 962 | /* If we want SSL and the last connection wasn't or vice versa,
|
|---|
| 963 | don't use it. Checking for host and port is not enough because
|
|---|
| 964 | HTTP and HTTPS can apparently coexist on the same port. */
|
|---|
| 965 | if (ssl != pconn.ssl)
|
|---|
| 966 | return 0;
|
|---|
| 967 |
|
|---|
| 968 | /* If we're not connecting to the same port, we're not interested. */
|
|---|
| 969 | if (port != pconn.port)
|
|---|
| 970 | return 0;
|
|---|
| 971 |
|
|---|
| 972 | /* If the host is the same, we're in business. If not, there is
|
|---|
| 973 | still hope -- read below. */
|
|---|
| 974 | if (0 != strcasecmp (host, pconn.host))
|
|---|
| 975 | {
|
|---|
| 976 | /* Check if pconn.socket is talking to HOST under another name.
|
|---|
| 977 | This happens often when both sites are virtual hosts
|
|---|
| 978 | distinguished only by name and served by the same network
|
|---|
| 979 | interface, and hence the same web server (possibly set up by
|
|---|
| 980 | the ISP and serving many different web sites). This
|
|---|
| 981 | admittedly unconventional optimization does not contradict
|
|---|
| 982 | HTTP and works well with popular server software. */
|
|---|
| 983 |
|
|---|
| 984 | int found;
|
|---|
| 985 | ip_address ip;
|
|---|
| 986 | struct address_list *al;
|
|---|
| 987 |
|
|---|
| 988 | if (ssl)
|
|---|
| 989 | /* Don't try to talk to two different SSL sites over the same
|
|---|
| 990 | secure connection! (Besides, it's not clear that
|
|---|
| 991 | name-based virtual hosting is even possible with SSL.) */
|
|---|
| 992 | return 0;
|
|---|
| 993 |
|
|---|
| 994 | /* If pconn.socket's peer is one of the IP addresses HOST
|
|---|
| 995 | resolves to, pconn.socket is for all intents and purposes
|
|---|
| 996 | already talking to HOST. */
|
|---|
| 997 |
|
|---|
| 998 | if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
|
|---|
| 999 | {
|
|---|
| 1000 | /* Can't get the peer's address -- something must be very
|
|---|
| 1001 | wrong with the connection. */
|
|---|
| 1002 | invalidate_persistent ();
|
|---|
| 1003 | return 0;
|
|---|
| 1004 | }
|
|---|
| 1005 | al = lookup_host (host, 0);
|
|---|
| 1006 | if (!al)
|
|---|
| 1007 | {
|
|---|
| 1008 | *host_lookup_failed = 1;
|
|---|
| 1009 | return 0;
|
|---|
| 1010 | }
|
|---|
| 1011 |
|
|---|
| 1012 | found = address_list_contains (al, &ip);
|
|---|
| 1013 | address_list_release (al);
|
|---|
| 1014 |
|
|---|
| 1015 | if (!found)
|
|---|
| 1016 | return 0;
|
|---|
| 1017 |
|
|---|
| 1018 | /* The persistent connection's peer address was found among the
|
|---|
| 1019 | addresses HOST resolved to; therefore, pconn.sock is in fact
|
|---|
| 1020 | already talking to HOST -- no need to reconnect. */
|
|---|
| 1021 | }
|
|---|
| 1022 |
|
|---|
| 1023 | /* Finally, check whether the connection is still open. This is
|
|---|
| 1024 | important because most server implement a liberal (short) timeout
|
|---|
| 1025 | on persistent connections. Wget can of course always reconnect
|
|---|
| 1026 | if the connection doesn't work out, but it's nicer to know in
|
|---|
| 1027 | advance. This test is a logical followup of the first test, but
|
|---|
| 1028 | is "expensive" and therefore placed at the end of the list. */
|
|---|
| 1029 |
|
|---|
| 1030 | if (!test_socket_open (pconn.socket))
|
|---|
| 1031 | {
|
|---|
| 1032 | /* Oops, the socket is no longer open. Now that we know that,
|
|---|
| 1033 | let's invalidate the persistent connection before returning
|
|---|
| 1034 | 0. */
|
|---|
| 1035 | invalidate_persistent ();
|
|---|
| 1036 | return 0;
|
|---|
| 1037 | }
|
|---|
| 1038 |
|
|---|
| 1039 | return 1;
|
|---|
| 1040 | }
|
|---|
| 1041 |
|
|---|
| 1042 | /* The idea behind these two CLOSE macros is to distinguish between
|
|---|
| 1043 | two cases: one when the job we've been doing is finished, and we
|
|---|
| 1044 | want to close the connection and leave, and two when something is
|
|---|
| 1045 | seriously wrong and we're closing the connection as part of
|
|---|
| 1046 | cleanup.
|
|---|
| 1047 |
|
|---|
| 1048 | In case of keep_alive, CLOSE_FINISH should leave the connection
|
|---|
| 1049 | open, while CLOSE_INVALIDATE should still close it.
|
|---|
| 1050 |
|
|---|
| 1051 | Note that the semantics of the flag `keep_alive' is "this
|
|---|
| 1052 | connection *will* be reused (the server has promised not to close
|
|---|
| 1053 | the connection once we're done)", while the semantics of
|
|---|
| 1054 | `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
|
|---|
| 1055 | active, registered connection". */
|
|---|
| 1056 |
|
|---|
| 1057 | #define CLOSE_FINISH(fd) do { \
|
|---|
| 1058 | if (!keep_alive) \
|
|---|
| 1059 | { \
|
|---|
| 1060 | if (pconn_active && (fd) == pconn.socket) \
|
|---|
| 1061 | invalidate_persistent (); \
|
|---|
| 1062 | else \
|
|---|
| 1063 | { \
|
|---|
| 1064 | fd_close (fd); \
|
|---|
| 1065 | fd = -1; \
|
|---|
| 1066 | } \
|
|---|
| 1067 | } \
|
|---|
| 1068 | } while (0)
|
|---|
| 1069 |
|
|---|
| 1070 | #define CLOSE_INVALIDATE(fd) do { \
|
|---|
| 1071 | if (pconn_active && (fd) == pconn.socket) \
|
|---|
| 1072 | invalidate_persistent (); \
|
|---|
| 1073 | else \
|
|---|
| 1074 | fd_close (fd); \
|
|---|
| 1075 | fd = -1; \
|
|---|
| 1076 | } while (0)
|
|---|
| 1077 | |
|---|
| 1078 |
|
|---|
| 1079 | struct http_stat
|
|---|
| 1080 | {
|
|---|
| 1081 | wgint len; /* received length */
|
|---|
| 1082 | wgint contlen; /* expected length */
|
|---|
| 1083 | wgint restval; /* the restart value */
|
|---|
| 1084 | int res; /* the result of last read */
|
|---|
| 1085 | char *newloc; /* new location (redirection) */
|
|---|
| 1086 | char *remote_time; /* remote time-stamp string */
|
|---|
| 1087 | char *error; /* textual HTTP error */
|
|---|
| 1088 | int statcode; /* status code */
|
|---|
| 1089 | wgint rd_size; /* amount of data read from socket */
|
|---|
| 1090 | double dltime; /* time it took to download the data */
|
|---|
| 1091 | const char *referer; /* value of the referer header. */
|
|---|
| 1092 | char **local_file; /* local file. */
|
|---|
| 1093 | };
|
|---|
| 1094 |
|
|---|
| 1095 | static void
|
|---|
| 1096 | free_hstat (struct http_stat *hs)
|
|---|
| 1097 | {
|
|---|
| 1098 | xfree_null (hs->newloc);
|
|---|
| 1099 | xfree_null (hs->remote_time);
|
|---|
| 1100 | xfree_null (hs->error);
|
|---|
| 1101 |
|
|---|
| 1102 | /* Guard against being called twice. */
|
|---|
| 1103 | hs->newloc = NULL;
|
|---|
| 1104 | hs->remote_time = NULL;
|
|---|
| 1105 | hs->error = NULL;
|
|---|
| 1106 | }
|
|---|
| 1107 |
|
|---|
| 1108 | static char *create_authorization_line PARAMS ((const char *, const char *,
|
|---|
| 1109 | const char *, const char *,
|
|---|
| 1110 | const char *, int *));
|
|---|
| 1111 | static char *basic_authentication_encode PARAMS ((const char *, const char *));
|
|---|
| 1112 | static int known_authentication_scheme_p PARAMS ((const char *, const char *));
|
|---|
| 1113 |
|
|---|
| 1114 | time_t http_atotm PARAMS ((const char *));
|
|---|
| 1115 |
|
|---|
| 1116 | #define BEGINS_WITH(line, string_constant) \
|
|---|
| 1117 | (!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
|
|---|
| 1118 | && (ISSPACE (line[sizeof (string_constant) - 1]) \
|
|---|
| 1119 | || !line[sizeof (string_constant) - 1]))
|
|---|
| 1120 |
|
|---|
| 1121 | #define SET_USER_AGENT(req) do { \
|
|---|
| 1122 | if (!opt.useragent) \
|
|---|
| 1123 | request_set_header (req, "User-Agent", \
|
|---|
| 1124 | aprintf ("Wget/%s", version_string), rel_value); \
|
|---|
| 1125 | else if (*opt.useragent) \
|
|---|
| 1126 | request_set_header (req, "User-Agent", opt.useragent, rel_none); \
|
|---|
| 1127 | } while (0)
|
|---|
| 1128 |
|
|---|
| 1129 | /* The flags that allow clobbering the file (opening with "wb").
|
|---|
| 1130 | Defined here to avoid repetition later. #### This will require
|
|---|
| 1131 | rework. */
|
|---|
| 1132 | #define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
|
|---|
| 1133 | || opt.dirstruct || opt.output_document)
|
|---|
| 1134 |
|
|---|
| 1135 | /* Retrieve a document through HTTP protocol. It recognizes status
|
|---|
| 1136 | code, and correctly handles redirections. It closes the network
|
|---|
| 1137 | socket. If it receives an error from the functions below it, it
|
|---|
| 1138 | will print it if there is enough information to do so (almost
|
|---|
| 1139 | always), returning the error to the caller (i.e. http_loop).
|
|---|
| 1140 |
|
|---|
| 1141 | Various HTTP parameters are stored to hs.
|
|---|
| 1142 |
|
|---|
| 1143 | If PROXY is non-NULL, the connection will be made to the proxy
|
|---|
| 1144 | server, and u->url will be requested. */
|
|---|
| 1145 | static uerr_t
|
|---|
| 1146 | gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|---|
| 1147 | {
|
|---|
| 1148 | struct request *req;
|
|---|
| 1149 |
|
|---|
| 1150 | char *type;
|
|---|
| 1151 | char *user, *passwd;
|
|---|
| 1152 | char *proxyauth;
|
|---|
| 1153 | int statcode;
|
|---|
| 1154 | int write_error;
|
|---|
| 1155 | wgint contlen, contrange;
|
|---|
| 1156 | struct url *conn;
|
|---|
| 1157 | FILE *fp;
|
|---|
| 1158 |
|
|---|
| 1159 | int sock = -1;
|
|---|
| 1160 | int flags;
|
|---|
| 1161 |
|
|---|
| 1162 | /* Set to 1 when the authorization has failed permanently and should
|
|---|
| 1163 | not be tried again. */
|
|---|
| 1164 | int auth_finished = 0;
|
|---|
| 1165 |
|
|---|
| 1166 | /* Whether NTLM authentication is used for this request. */
|
|---|
| 1167 | int ntlm_seen = 0;
|
|---|
| 1168 |
|
|---|
| 1169 | /* Whether our connection to the remote host is through SSL. */
|
|---|
| 1170 | int using_ssl = 0;
|
|---|
| 1171 |
|
|---|
| 1172 | /* Whether a HEAD request will be issued (as opposed to GET or
|
|---|
| 1173 | POST). */
|
|---|
| 1174 | int head_only = *dt & HEAD_ONLY;
|
|---|
| 1175 |
|
|---|
| 1176 | char *head;
|
|---|
| 1177 | struct response *resp;
|
|---|
| 1178 | char hdrval[256];
|
|---|
| 1179 | char *message;
|
|---|
| 1180 |
|
|---|
| 1181 | /* Whether this connection will be kept alive after the HTTP request
|
|---|
| 1182 | is done. */
|
|---|
| 1183 | int keep_alive;
|
|---|
| 1184 |
|
|---|
| 1185 | /* Whether keep-alive should be inhibited.
|
|---|
| 1186 |
|
|---|
| 1187 | RFC 2068 requests that 1.0 clients not send keep-alive requests
|
|---|
| 1188 | to proxies. This is because many 1.0 proxies do not interpret
|
|---|
| 1189 | the Connection header and transfer it to the remote server,
|
|---|
| 1190 | causing it to not close the connection and leave both the proxy
|
|---|
| 1191 | and the client hanging. */
|
|---|
| 1192 | int inhibit_keep_alive =
|
|---|
| 1193 | !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
|
|---|
| 1194 |
|
|---|
| 1195 | /* Headers sent when using POST. */
|
|---|
| 1196 | wgint post_data_size = 0;
|
|---|
| 1197 |
|
|---|
| 1198 | int host_lookup_failed = 0;
|
|---|
| 1199 |
|
|---|
| 1200 | #ifdef HAVE_SSL
|
|---|
| 1201 | if (u->scheme == SCHEME_HTTPS)
|
|---|
| 1202 | {
|
|---|
| 1203 | /* Initialize the SSL context. After this has once been done,
|
|---|
| 1204 | it becomes a no-op. */
|
|---|
| 1205 | if (!ssl_init ())
|
|---|
| 1206 | {
|
|---|
| 1207 | scheme_disable (SCHEME_HTTPS);
|
|---|
| 1208 | logprintf (LOG_NOTQUIET,
|
|---|
| 1209 | _("Disabling SSL due to encountered errors.\n"));
|
|---|
| 1210 | return SSLINITFAILED;
|
|---|
| 1211 | }
|
|---|
| 1212 | }
|
|---|
| 1213 | #endif /* HAVE_SSL */
|
|---|
| 1214 |
|
|---|
| 1215 | if (!head_only)
|
|---|
| 1216 | /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
|
|---|
| 1217 | know the local filename so we can save to it. */
|
|---|
| 1218 | assert (*hs->local_file != NULL);
|
|---|
| 1219 |
|
|---|
| 1220 | /* Initialize certain elements of struct http_stat. */
|
|---|
| 1221 | hs->len = 0;
|
|---|
| 1222 | hs->contlen = -1;
|
|---|
| 1223 | hs->res = -1;
|
|---|
| 1224 | hs->newloc = NULL;
|
|---|
| 1225 | hs->remote_time = NULL;
|
|---|
| 1226 | hs->error = NULL;
|
|---|
| 1227 |
|
|---|
| 1228 | conn = u;
|
|---|
| 1229 |
|
|---|
| 1230 | /* Prepare the request to send. */
|
|---|
| 1231 |
|
|---|
| 1232 | req = request_new ();
|
|---|
| 1233 | {
|
|---|
| 1234 | char *meth_arg;
|
|---|
| 1235 | const char *meth = "GET";
|
|---|
| 1236 | if (head_only)
|
|---|
| 1237 | meth = "HEAD";
|
|---|
| 1238 | else if (opt.post_file_name || opt.post_data)
|
|---|
| 1239 | meth = "POST";
|
|---|
| 1240 | /* Use the full path, i.e. one that includes the leading slash and
|
|---|
| 1241 | the query string. E.g. if u->path is "foo/bar" and u->query is
|
|---|
| 1242 | "param=value", full_path will be "/foo/bar?param=value". */
|
|---|
| 1243 | if (proxy
|
|---|
| 1244 | #ifdef HAVE_SSL
|
|---|
| 1245 | /* When using SSL over proxy, CONNECT establishes a direct
|
|---|
| 1246 | connection to the HTTPS server. Therefore use the same
|
|---|
| 1247 | argument as when talking to the server directly. */
|
|---|
| 1248 | && u->scheme != SCHEME_HTTPS
|
|---|
| 1249 | #endif
|
|---|
| 1250 | )
|
|---|
| 1251 | meth_arg = xstrdup (u->url);
|
|---|
| 1252 | else
|
|---|
| 1253 | meth_arg = url_full_path (u);
|
|---|
| 1254 | request_set_method (req, meth, meth_arg);
|
|---|
| 1255 | }
|
|---|
| 1256 |
|
|---|
| 1257 | request_set_header (req, "Referer", (char *) hs->referer, rel_none);
|
|---|
| 1258 | if (*dt & SEND_NOCACHE)
|
|---|
| 1259 | request_set_header (req, "Pragma", "no-cache", rel_none);
|
|---|
| 1260 | if (hs->restval)
|
|---|
| 1261 | request_set_header (req, "Range",
|
|---|
| 1262 | aprintf ("bytes=%s-",
|
|---|
| 1263 | number_to_static_string (hs->restval)),
|
|---|
| 1264 | rel_value);
|
|---|
| 1265 | SET_USER_AGENT (req);
|
|---|
| 1266 | request_set_header (req, "Accept", "*/*", rel_none);
|
|---|
| 1267 |
|
|---|
| 1268 | /* Find the username and password for authentication. */
|
|---|
| 1269 | user = u->user;
|
|---|
| 1270 | passwd = u->passwd;
|
|---|
| 1271 | search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
|
|---|
| 1272 | user = user ? user : (opt.http_user ? opt.http_user : opt.user);
|
|---|
| 1273 | passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
|
|---|
| 1274 |
|
|---|
| 1275 | if (user && passwd)
|
|---|
| 1276 | {
|
|---|
| 1277 | /* We have the username and the password, but haven't tried
|
|---|
| 1278 | any authorization yet. Let's see if the "Basic" method
|
|---|
| 1279 | works. If not, we'll come back here and construct a
|
|---|
| 1280 | proper authorization method with the right challenges.
|
|---|
| 1281 |
|
|---|
| 1282 | If we didn't employ this kind of logic, every URL that
|
|---|
| 1283 | requires authorization would have to be processed twice,
|
|---|
| 1284 | which is very suboptimal and generates a bunch of false
|
|---|
| 1285 | "unauthorized" errors in the server log.
|
|---|
| 1286 |
|
|---|
| 1287 | #### But this logic also has a serious problem when used
|
|---|
| 1288 | with stronger authentications: we *first* transmit the
|
|---|
| 1289 | username and the password in clear text, and *then* attempt a
|
|---|
| 1290 | stronger authentication scheme. That cannot be right! We
|
|---|
| 1291 | are only fortunate that almost everyone still uses the
|
|---|
| 1292 | `Basic' scheme anyway.
|
|---|
| 1293 |
|
|---|
| 1294 | There should be an option to prevent this from happening, for
|
|---|
| 1295 | those who use strong authentication schemes and value their
|
|---|
| 1296 | passwords. */
|
|---|
| 1297 | request_set_header (req, "Authorization",
|
|---|
| 1298 | basic_authentication_encode (user, passwd),
|
|---|
| 1299 | rel_value);
|
|---|
| 1300 | }
|
|---|
| 1301 |
|
|---|
| 1302 | proxyauth = NULL;
|
|---|
| 1303 | if (proxy)
|
|---|
| 1304 | {
|
|---|
| 1305 | char *proxy_user, *proxy_passwd;
|
|---|
| 1306 | /* For normal username and password, URL components override
|
|---|
| 1307 | command-line/wgetrc parameters. With proxy
|
|---|
| 1308 | authentication, it's the reverse, because proxy URLs are
|
|---|
| 1309 | normally the "permanent" ones, so command-line args
|
|---|
| 1310 | should take precedence. */
|
|---|
| 1311 | if (opt.proxy_user && opt.proxy_passwd)
|
|---|
| 1312 | {
|
|---|
| 1313 | proxy_user = opt.proxy_user;
|
|---|
| 1314 | proxy_passwd = opt.proxy_passwd;
|
|---|
| 1315 | }
|
|---|
| 1316 | else
|
|---|
| 1317 | {
|
|---|
| 1318 | proxy_user = proxy->user;
|
|---|
| 1319 | proxy_passwd = proxy->passwd;
|
|---|
| 1320 | }
|
|---|
| 1321 | /* #### This does not appear right. Can't the proxy request,
|
|---|
| 1322 | say, `Digest' authentication? */
|
|---|
| 1323 | if (proxy_user && proxy_passwd)
|
|---|
| 1324 | proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
|
|---|
| 1325 |
|
|---|
| 1326 | /* If we're using a proxy, we will be connecting to the proxy
|
|---|
| 1327 | server. */
|
|---|
| 1328 | conn = proxy;
|
|---|
| 1329 |
|
|---|
| 1330 | /* Proxy authorization over SSL is handled below. */
|
|---|
| 1331 | #ifdef HAVE_SSL
|
|---|
| 1332 | if (u->scheme != SCHEME_HTTPS)
|
|---|
| 1333 | #endif
|
|---|
| 1334 | request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
|
|---|
| 1335 | }
|
|---|
| 1336 |
|
|---|
| 1337 | {
|
|---|
| 1338 | /* Whether we need to print the host header with braces around
|
|---|
| 1339 | host, e.g. "Host: [3ffe:8100:200:2::2]:1234" instead of the
|
|---|
| 1340 | usual "Host: symbolic-name:1234". */
|
|---|
| 1341 | int squares = strchr (u->host, ':') != NULL;
|
|---|
| 1342 | if (u->port == scheme_default_port (u->scheme))
|
|---|
| 1343 | request_set_header (req, "Host",
|
|---|
| 1344 | aprintf (squares ? "[%s]" : "%s", u->host),
|
|---|
| 1345 | rel_value);
|
|---|
| 1346 | else
|
|---|
| 1347 | request_set_header (req, "Host",
|
|---|
| 1348 | aprintf (squares ? "[%s]:%d" : "%s:%d",
|
|---|
| 1349 | u->host, u->port),
|
|---|
| 1350 | rel_value);
|
|---|
| 1351 | }
|
|---|
| 1352 |
|
|---|
| 1353 | if (!inhibit_keep_alive)
|
|---|
| 1354 | request_set_header (req, "Connection", "Keep-Alive", rel_none);
|
|---|
| 1355 |
|
|---|
| 1356 | if (opt.cookies)
|
|---|
| 1357 | request_set_header (req, "Cookie",
|
|---|
| 1358 | cookie_header (wget_cookie_jar,
|
|---|
| 1359 | u->host, u->port, u->path,
|
|---|
| 1360 | #ifdef HAVE_SSL
|
|---|
| 1361 | u->scheme == SCHEME_HTTPS
|
|---|
| 1362 | #else
|
|---|
| 1363 | 0
|
|---|
| 1364 | #endif
|
|---|
| 1365 | ),
|
|---|
| 1366 | rel_value);
|
|---|
| 1367 |
|
|---|
| 1368 | if (opt.post_data || opt.post_file_name)
|
|---|
| 1369 | {
|
|---|
| 1370 | request_set_header (req, "Content-Type",
|
|---|
| 1371 | "application/x-www-form-urlencoded", rel_none);
|
|---|
| 1372 | if (opt.post_data)
|
|---|
| 1373 | post_data_size = strlen (opt.post_data);
|
|---|
| 1374 | else
|
|---|
| 1375 | {
|
|---|
| 1376 | post_data_size = file_size (opt.post_file_name);
|
|---|
| 1377 | if (post_data_size == -1)
|
|---|
| 1378 | {
|
|---|
| 1379 | logprintf (LOG_NOTQUIET, _("POST data file `%s' missing: %s\n"),
|
|---|
| 1380 | opt.post_file_name, strerror (errno));
|
|---|
| 1381 | post_data_size = 0;
|
|---|
| 1382 | }
|
|---|
| 1383 | }
|
|---|
| 1384 | request_set_header (req, "Content-Length",
|
|---|
| 1385 | xstrdup (number_to_static_string (post_data_size)),
|
|---|
| 1386 | rel_value);
|
|---|
| 1387 | }
|
|---|
| 1388 |
|
|---|
| 1389 | /* Add the user headers. */
|
|---|
| 1390 | if (opt.user_headers)
|
|---|
| 1391 | {
|
|---|
| 1392 | int i;
|
|---|
| 1393 | for (i = 0; opt.user_headers[i]; i++)
|
|---|
| 1394 | request_set_user_header (req, opt.user_headers[i]);
|
|---|
| 1395 | }
|
|---|
| 1396 |
|
|---|
| 1397 | retry_with_auth:
|
|---|
| 1398 | /* We need to come back here when the initial attempt to retrieve
|
|---|
| 1399 | without authorization header fails. (Expected to happen at least
|
|---|
| 1400 | for the Digest authorization scheme.) */
|
|---|
| 1401 |
|
|---|
| 1402 | keep_alive = 0;
|
|---|
| 1403 |
|
|---|
| 1404 | /* Establish the connection. */
|
|---|
| 1405 |
|
|---|
| 1406 | if (!inhibit_keep_alive)
|
|---|
| 1407 | {
|
|---|
| 1408 | /* Look for a persistent connection to target host, unless a
|
|---|
| 1409 | proxy is used. The exception is when SSL is in use, in which
|
|---|
| 1410 | case the proxy is nothing but a passthrough to the target
|
|---|
| 1411 | host, registered as a connection to the latter. */
|
|---|
| 1412 | struct url *relevant = conn;
|
|---|
| 1413 | #ifdef HAVE_SSL
|
|---|
| 1414 | if (u->scheme == SCHEME_HTTPS)
|
|---|
| 1415 | relevant = u;
|
|---|
| 1416 | #endif
|
|---|
| 1417 |
|
|---|
| 1418 | if (persistent_available_p (relevant->host, relevant->port,
|
|---|
| 1419 | #ifdef HAVE_SSL
|
|---|
| 1420 | relevant->scheme == SCHEME_HTTPS,
|
|---|
| 1421 | #else
|
|---|
| 1422 | 0,
|
|---|
| 1423 | #endif
|
|---|
| 1424 | &host_lookup_failed))
|
|---|
| 1425 | {
|
|---|
| 1426 | sock = pconn.socket;
|
|---|
| 1427 | using_ssl = pconn.ssl;
|
|---|
| 1428 | logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
|
|---|
| 1429 | escnonprint (pconn.host), pconn.port);
|
|---|
| 1430 | DEBUGP (("Reusing fd %d.\n", sock));
|
|---|
| 1431 | if (pconn.authorized)
|
|---|
| 1432 | /* If the connection is already authorized, the "Basic"
|
|---|
| 1433 | authorization added by code above is unnecessary and
|
|---|
| 1434 | only hurts us. */
|
|---|
| 1435 | request_remove_header (req, "Authorization");
|
|---|
| 1436 | }
|
|---|
| 1437 | }
|
|---|
| 1438 |
|
|---|
| 1439 | if (sock < 0)
|
|---|
| 1440 | {
|
|---|
| 1441 | /* In its current implementation, persistent_available_p will
|
|---|
| 1442 | look up conn->host in some cases. If that lookup failed, we
|
|---|
| 1443 | don't need to bother with connect_to_host. */
|
|---|
| 1444 | if (host_lookup_failed)
|
|---|
| 1445 | {
|
|---|
| 1446 | request_free (req);
|
|---|
| 1447 | return HOSTERR;
|
|---|
| 1448 | }
|
|---|
| 1449 |
|
|---|
| 1450 | sock = connect_to_host (conn->host, conn->port);
|
|---|
| 1451 | if (sock == E_HOST)
|
|---|
| 1452 | {
|
|---|
| 1453 | request_free (req);
|
|---|
| 1454 | return HOSTERR;
|
|---|
| 1455 | }
|
|---|
| 1456 | else if (sock < 0)
|
|---|
| 1457 | {
|
|---|
| 1458 | request_free (req);
|
|---|
| 1459 | return (retryable_socket_connect_error (errno)
|
|---|
| 1460 | ? CONERROR : CONIMPOSSIBLE);
|
|---|
| 1461 | }
|
|---|
| 1462 |
|
|---|
| 1463 | #ifdef HAVE_SSL
|
|---|
| 1464 | if (proxy && u->scheme == SCHEME_HTTPS)
|
|---|
| 1465 | {
|
|---|
| 1466 | /* When requesting SSL URLs through proxies, use the
|
|---|
| 1467 | CONNECT method to request passthrough. */
|
|---|
| 1468 | struct request *connreq = request_new ();
|
|---|
| 1469 | request_set_method (connreq, "CONNECT",
|
|---|
| 1470 | aprintf ("%s:%d", u->host, u->port));
|
|---|
| 1471 | SET_USER_AGENT (connreq);
|
|---|
| 1472 | if (proxyauth)
|
|---|
| 1473 | {
|
|---|
| 1474 | request_set_header (connreq, "Proxy-Authorization",
|
|---|
| 1475 | proxyauth, rel_value);
|
|---|
| 1476 | /* Now that PROXYAUTH is part of the CONNECT request,
|
|---|
| 1477 | zero it out so we don't send proxy authorization with
|
|---|
| 1478 | the regular request below. */
|
|---|
| 1479 | proxyauth = NULL;
|
|---|
| 1480 | }
|
|---|
| 1481 | /* Examples in rfc2817 use the Host header in CONNECT
|
|---|
| 1482 | requests. I don't see how that gains anything, given
|
|---|
| 1483 | that the contents of Host would be exactly the same as
|
|---|
| 1484 | the contents of CONNECT. */
|
|---|
| 1485 |
|
|---|
| 1486 | write_error = request_send (connreq, sock);
|
|---|
| 1487 | request_free (connreq);
|
|---|
| 1488 | if (write_error < 0)
|
|---|
| 1489 | {
|
|---|
| 1490 | CLOSE_INVALIDATE (sock);
|
|---|
| 1491 | return WRITEFAILED;
|
|---|
| 1492 | }
|
|---|
| 1493 |
|
|---|
| 1494 | head = read_http_response_head (sock);
|
|---|
| 1495 | if (!head)
|
|---|
| 1496 | {
|
|---|
| 1497 | logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
|
|---|
| 1498 | strerror (errno));
|
|---|
| 1499 | CLOSE_INVALIDATE (sock);
|
|---|
| 1500 | return HERR;
|
|---|
| 1501 | }
|
|---|
| 1502 | message = NULL;
|
|---|
| 1503 | if (!*head)
|
|---|
| 1504 | {
|
|---|
| 1505 | xfree (head);
|
|---|
| 1506 | goto failed_tunnel;
|
|---|
| 1507 | }
|
|---|
| 1508 | DEBUGP (("proxy responded with: [%s]\n", head));
|
|---|
| 1509 |
|
|---|
| 1510 | resp = resp_new (head);
|
|---|
| 1511 | statcode = resp_status (resp, &message);
|
|---|
| 1512 | resp_free (resp);
|
|---|
| 1513 | xfree (head);
|
|---|
| 1514 | if (statcode != 200)
|
|---|
| 1515 | {
|
|---|
| 1516 | failed_tunnel:
|
|---|
| 1517 | logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
|
|---|
| 1518 | message ? escnonprint (message) : "?");
|
|---|
| 1519 | xfree_null (message);
|
|---|
| 1520 | return CONSSLERR;
|
|---|
| 1521 | }
|
|---|
| 1522 | xfree_null (message);
|
|---|
| 1523 |
|
|---|
| 1524 | /* SOCK is now *really* connected to u->host, so update CONN
|
|---|
| 1525 | to reflect this. That way register_persistent will
|
|---|
| 1526 | register SOCK as being connected to u->host:u->port. */
|
|---|
| 1527 | conn = u;
|
|---|
| 1528 | }
|
|---|
| 1529 |
|
|---|
| 1530 | if (conn->scheme == SCHEME_HTTPS)
|
|---|
| 1531 | {
|
|---|
| 1532 | if (!ssl_connect (sock) || !ssl_check_certificate (sock, u->host))
|
|---|
| 1533 | {
|
|---|
| 1534 | fd_close (sock);
|
|---|
| 1535 | return CONSSLERR;
|
|---|
| 1536 | }
|
|---|
| 1537 | using_ssl = 1;
|
|---|
| 1538 | }
|
|---|
| 1539 | #endif /* HAVE_SSL */
|
|---|
| 1540 | }
|
|---|
| 1541 |
|
|---|
| 1542 | /* Send the request to server. */
|
|---|
| 1543 | write_error = request_send (req, sock);
|
|---|
| 1544 |
|
|---|
| 1545 | if (write_error >= 0)
|
|---|
| 1546 | {
|
|---|
| 1547 | if (opt.post_data)
|
|---|
| 1548 | {
|
|---|
| 1549 | DEBUGP (("[POST data: %s]\n", opt.post_data));
|
|---|
| 1550 | write_error = fd_write (sock, opt.post_data, post_data_size, -1.0);
|
|---|
| 1551 | }
|
|---|
| 1552 | else if (opt.post_file_name && post_data_size != 0)
|
|---|
| 1553 | write_error = post_file (sock, opt.post_file_name, post_data_size);
|
|---|
| 1554 | }
|
|---|
| 1555 |
|
|---|
| 1556 | if (write_error < 0)
|
|---|
| 1557 | {
|
|---|
| 1558 | CLOSE_INVALIDATE (sock);
|
|---|
| 1559 | request_free (req);
|
|---|
| 1560 | return WRITEFAILED;
|
|---|
| 1561 | }
|
|---|
| 1562 | logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
|
|---|
| 1563 | proxy ? "Proxy" : "HTTP");
|
|---|
| 1564 | contlen = -1;
|
|---|
| 1565 | contrange = 0;
|
|---|
| 1566 | *dt &= ~RETROKF;
|
|---|
| 1567 |
|
|---|
| 1568 | head = read_http_response_head (sock);
|
|---|
| 1569 | if (!head)
|
|---|
| 1570 | {
|
|---|
| 1571 | if (errno == 0)
|
|---|
| 1572 | {
|
|---|
| 1573 | logputs (LOG_NOTQUIET, _("No data received.\n"));
|
|---|
| 1574 | CLOSE_INVALIDATE (sock);
|
|---|
| 1575 | request_free (req);
|
|---|
| 1576 | return HEOF;
|
|---|
| 1577 | }
|
|---|
| 1578 | else
|
|---|
| 1579 | {
|
|---|
| 1580 | logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
|
|---|
| 1581 | strerror (errno));
|
|---|
| 1582 | CLOSE_INVALIDATE (sock);
|
|---|
| 1583 | request_free (req);
|
|---|
| 1584 | return HERR;
|
|---|
| 1585 | }
|
|---|
| 1586 | }
|
|---|
| 1587 | DEBUGP (("\n---response begin---\n%s---response end---\n", head));
|
|---|
| 1588 |
|
|---|
| 1589 | resp = resp_new (head);
|
|---|
| 1590 |
|
|---|
| 1591 | /* Check for status line. */
|
|---|
| 1592 | message = NULL;
|
|---|
| 1593 | statcode = resp_status (resp, &message);
|
|---|
| 1594 | if (!opt.server_response)
|
|---|
| 1595 | logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
|
|---|
| 1596 | message ? escnonprint (message) : "");
|
|---|
| 1597 | else
|
|---|
| 1598 | {
|
|---|
| 1599 | logprintf (LOG_VERBOSE, "\n");
|
|---|
| 1600 | print_server_response (resp, " ");
|
|---|
| 1601 | }
|
|---|
| 1602 |
|
|---|
| 1603 | if (!opt.ignore_length
|
|---|
| 1604 | && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
|
|---|
| 1605 | {
|
|---|
| 1606 | wgint parsed;
|
|---|
| 1607 | errno = 0;
|
|---|
| 1608 | parsed = str_to_wgint (hdrval, NULL, 10);
|
|---|
| 1609 | if (parsed == WGINT_MAX && errno == ERANGE)
|
|---|
| 1610 | /* Out of range.
|
|---|
| 1611 | #### If Content-Length is out of range, it most likely
|
|---|
| 1612 | means that the file is larger than 2G and that we're
|
|---|
| 1613 | compiled without LFS. In that case we should probably
|
|---|
| 1614 | refuse to even attempt to download the file. */
|
|---|
| 1615 | contlen = -1;
|
|---|
| 1616 | else
|
|---|
| 1617 | contlen = parsed;
|
|---|
| 1618 | }
|
|---|
| 1619 |
|
|---|
| 1620 | /* Check for keep-alive related responses. */
|
|---|
| 1621 | if (!inhibit_keep_alive && contlen != -1)
|
|---|
| 1622 | {
|
|---|
| 1623 | if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
|
|---|
| 1624 | keep_alive = 1;
|
|---|
| 1625 | else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
|
|---|
| 1626 | {
|
|---|
| 1627 | if (0 == strcasecmp (hdrval, "Keep-Alive"))
|
|---|
| 1628 | keep_alive = 1;
|
|---|
| 1629 | }
|
|---|
| 1630 | }
|
|---|
| 1631 | if (keep_alive)
|
|---|
| 1632 | /* The server has promised that it will not close the connection
|
|---|
| 1633 | when we're done. This means that we can register it. */
|
|---|
| 1634 | register_persistent (conn->host, conn->port, sock, using_ssl);
|
|---|
| 1635 |
|
|---|
| 1636 | if (statcode == HTTP_STATUS_UNAUTHORIZED)
|
|---|
| 1637 | {
|
|---|
| 1638 | /* Authorization is required. */
|
|---|
| 1639 | if (keep_alive && !head_only && skip_short_body (sock, contlen))
|
|---|
| 1640 | CLOSE_FINISH (sock);
|
|---|
| 1641 | else
|
|---|
| 1642 | CLOSE_INVALIDATE (sock);
|
|---|
| 1643 | pconn.authorized = 0;
|
|---|
| 1644 | if (!auth_finished && (user && passwd))
|
|---|
| 1645 | {
|
|---|
| 1646 | /* IIS sends multiple copies of WWW-Authenticate, one with
|
|---|
| 1647 | the value "negotiate", and other(s) with data. Loop over
|
|---|
| 1648 | all the occurrences and pick the one we recognize. */
|
|---|
| 1649 | int wapos;
|
|---|
| 1650 | const char *wabeg, *waend;
|
|---|
| 1651 | char *www_authenticate = NULL;
|
|---|
| 1652 | for (wapos = 0;
|
|---|
| 1653 | (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
|
|---|
| 1654 | &wabeg, &waend)) != -1;
|
|---|
| 1655 | ++wapos)
|
|---|
| 1656 | if (known_authentication_scheme_p (wabeg, waend))
|
|---|
| 1657 | {
|
|---|
| 1658 | BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
|
|---|
| 1659 | break;
|
|---|
| 1660 | }
|
|---|
| 1661 |
|
|---|
| 1662 | if (!www_authenticate)
|
|---|
| 1663 | /* If the authentication header is missing or
|
|---|
| 1664 | unrecognized, there's no sense in retrying. */
|
|---|
| 1665 | logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
|
|---|
| 1666 | else if (BEGINS_WITH (www_authenticate, "Basic"))
|
|---|
| 1667 | /* If the authentication scheme is "Basic", which we send
|
|---|
| 1668 | by default, there's no sense in retrying either. (This
|
|---|
| 1669 | should be changed when we stop sending "Basic" data by
|
|---|
| 1670 | default.) */
|
|---|
| 1671 | ;
|
|---|
| 1672 | else
|
|---|
| 1673 | {
|
|---|
| 1674 | char *pth;
|
|---|
| 1675 | pth = url_full_path (u);
|
|---|
| 1676 | request_set_header (req, "Authorization",
|
|---|
| 1677 | create_authorization_line (www_authenticate,
|
|---|
| 1678 | user, passwd,
|
|---|
| 1679 | request_method (req),
|
|---|
| 1680 | pth,
|
|---|
| 1681 | &auth_finished),
|
|---|
| 1682 | rel_value);
|
|---|
| 1683 | if (BEGINS_WITH (www_authenticate, "NTLM"))
|
|---|
| 1684 | ntlm_seen = 1;
|
|---|
| 1685 | xfree (pth);
|
|---|
| 1686 | goto retry_with_auth;
|
|---|
| 1687 | }
|
|---|
| 1688 | }
|
|---|
| 1689 | logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
|
|---|
| 1690 | request_free (req);
|
|---|
| 1691 | return AUTHFAILED;
|
|---|
| 1692 | }
|
|---|
| 1693 | else /* statcode != HTTP_STATUS_UNAUTHORIZED */
|
|---|
| 1694 | {
|
|---|
| 1695 | /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
|
|---|
| 1696 | if (ntlm_seen)
|
|---|
| 1697 | pconn.authorized = 1;
|
|---|
| 1698 | }
|
|---|
| 1699 | request_free (req);
|
|---|
| 1700 |
|
|---|
| 1701 | hs->statcode = statcode;
|
|---|
| 1702 | if (statcode == -1)
|
|---|
| 1703 | hs->error = xstrdup (_("Malformed status line"));
|
|---|
| 1704 | else if (!*message)
|
|---|
| 1705 | hs->error = xstrdup (_("(no description)"));
|
|---|
| 1706 | else
|
|---|
| 1707 | hs->error = xstrdup (message);
|
|---|
| 1708 | xfree_null (message);
|
|---|
| 1709 |
|
|---|
| 1710 | type = resp_header_strdup (resp, "Content-Type");
|
|---|
| 1711 | if (type)
|
|---|
| 1712 | {
|
|---|
| 1713 | char *tmp = strchr (type, ';');
|
|---|
| 1714 | if (tmp)
|
|---|
| 1715 | {
|
|---|
| 1716 | while (tmp > type && ISSPACE (tmp[-1]))
|
|---|
| 1717 | --tmp;
|
|---|
| 1718 | *tmp = '\0';
|
|---|
| 1719 | }
|
|---|
| 1720 | }
|
|---|
| 1721 | hs->newloc = resp_header_strdup (resp, "Location");
|
|---|
| 1722 | hs->remote_time = resp_header_strdup (resp, "Last-Modified");
|
|---|
| 1723 |
|
|---|
| 1724 | /* Handle (possibly multiple instances of) the Set-Cookie header. */
|
|---|
| 1725 | if (opt.cookies)
|
|---|
| 1726 | {
|
|---|
| 1727 | int scpos;
|
|---|
| 1728 | const char *scbeg, *scend;
|
|---|
| 1729 | /* The jar should have been created by now. */
|
|---|
| 1730 | assert (wget_cookie_jar != NULL);
|
|---|
| 1731 | for (scpos = 0;
|
|---|
| 1732 | (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
|
|---|
| 1733 | &scbeg, &scend)) != -1;
|
|---|
| 1734 | ++scpos)
|
|---|
| 1735 | {
|
|---|
| 1736 | char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
|
|---|
| 1737 | cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
|
|---|
| 1738 | u->path, set_cookie);
|
|---|
| 1739 | }
|
|---|
| 1740 | }
|
|---|
| 1741 |
|
|---|
| 1742 | if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
|
|---|
| 1743 | {
|
|---|
| 1744 | wgint first_byte_pos, last_byte_pos, entity_length;
|
|---|
| 1745 | if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
|
|---|
| 1746 | &entity_length))
|
|---|
| 1747 | contrange = first_byte_pos;
|
|---|
| 1748 | }
|
|---|
| 1749 | resp_free (resp);
|
|---|
| 1750 |
|
|---|
| 1751 | /* 20x responses are counted among successful by default. */
|
|---|
| 1752 | if (H_20X (statcode))
|
|---|
| 1753 | *dt |= RETROKF;
|
|---|
| 1754 |
|
|---|
| 1755 | /* Return if redirected. */
|
|---|
| 1756 | if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
|
|---|
| 1757 | {
|
|---|
| 1758 | /* RFC2068 says that in case of the 300 (multiple choices)
|
|---|
| 1759 | response, the server can output a preferred URL through
|
|---|
| 1760 | `Location' header; otherwise, the request should be treated
|
|---|
| 1761 | like GET. So, if the location is set, it will be a
|
|---|
| 1762 | redirection; otherwise, just proceed normally. */
|
|---|
| 1763 | if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
|
|---|
| 1764 | *dt |= RETROKF;
|
|---|
| 1765 | else
|
|---|
| 1766 | {
|
|---|
| 1767 | logprintf (LOG_VERBOSE,
|
|---|
| 1768 | _("Location: %s%s\n"),
|
|---|
| 1769 | hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
|
|---|
| 1770 | hs->newloc ? _(" [following]") : "");
|
|---|
| 1771 | if (keep_alive && !head_only && skip_short_body (sock, contlen))
|
|---|
| 1772 | CLOSE_FINISH (sock);
|
|---|
| 1773 | else
|
|---|
| 1774 | CLOSE_INVALIDATE (sock);
|
|---|
| 1775 | xfree_null (type);
|
|---|
| 1776 | return NEWLOCATION;
|
|---|
| 1777 | }
|
|---|
| 1778 | }
|
|---|
| 1779 |
|
|---|
| 1780 | /* If content-type is not given, assume text/html. This is because
|
|---|
| 1781 | of the multitude of broken CGI's that "forget" to generate the
|
|---|
| 1782 | content-type. */
|
|---|
| 1783 | if (!type ||
|
|---|
| 1784 | 0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
|
|---|
| 1785 | 0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
|
|---|
| 1786 | *dt |= TEXTHTML;
|
|---|
| 1787 | else
|
|---|
| 1788 | *dt &= ~TEXTHTML;
|
|---|
| 1789 |
|
|---|
| 1790 | if (opt.html_extension && (*dt & TEXTHTML))
|
|---|
| 1791 | /* -E / --html-extension / html_extension = on was specified, and this is a
|
|---|
| 1792 | text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
|---|
| 1793 | already the file's suffix, tack on ".html". */
|
|---|
| 1794 | {
|
|---|
| 1795 | char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
|
|---|
| 1796 |
|
|---|
| 1797 | if (last_period_in_local_filename == NULL
|
|---|
| 1798 | || !(0 == strcasecmp (last_period_in_local_filename, ".htm")
|
|---|
| 1799 | || 0 == strcasecmp (last_period_in_local_filename, ".html")))
|
|---|
| 1800 | {
|
|---|
| 1801 | int local_filename_len = strlen (*hs->local_file);
|
|---|
| 1802 | /* Resize the local file, allowing for ".html" preceded by
|
|---|
| 1803 | optional ".NUMBER". */
|
|---|
| 1804 | *hs->local_file = xrealloc (*hs->local_file,
|
|---|
| 1805 | local_filename_len + 24 + sizeof (".html"));
|
|---|
| 1806 | strcpy(*hs->local_file + local_filename_len, ".html");
|
|---|
| 1807 | /* If clobbering is not allowed and the file, as named,
|
|---|
| 1808 | exists, tack on ".NUMBER.html" instead. */
|
|---|
| 1809 | if (!ALLOW_CLOBBER)
|
|---|
| 1810 | {
|
|---|
| 1811 | int ext_num = 1;
|
|---|
| 1812 | do
|
|---|
| 1813 | sprintf (*hs->local_file + local_filename_len,
|
|---|
| 1814 | ".%d.html", ext_num++);
|
|---|
| 1815 | while (file_exists_p (*hs->local_file));
|
|---|
| 1816 | }
|
|---|
| 1817 | *dt |= ADDED_HTML_EXTENSION;
|
|---|
| 1818 | }
|
|---|
| 1819 | }
|
|---|
| 1820 |
|
|---|
| 1821 | if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
|
|---|
| 1822 | {
|
|---|
| 1823 | /* If `-c' is in use and the file has been fully downloaded (or
|
|---|
| 1824 | the remote file has shrunk), Wget effectively requests bytes
|
|---|
| 1825 | after the end of file and the server response with 416. */
|
|---|
| 1826 | logputs (LOG_VERBOSE, _("\
|
|---|
| 1827 | \n The file is already fully retrieved; nothing to do.\n\n"));
|
|---|
| 1828 | /* In case the caller inspects. */
|
|---|
| 1829 | hs->len = contlen;
|
|---|
| 1830 | hs->res = 0;
|
|---|
| 1831 | /* Mark as successfully retrieved. */
|
|---|
| 1832 | *dt |= RETROKF;
|
|---|
| 1833 | xfree_null (type);
|
|---|
| 1834 | CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
|---|
| 1835 | might be more bytes in the body. */
|
|---|
| 1836 | return RETRUNNEEDED;
|
|---|
| 1837 | }
|
|---|
| 1838 | if ((contrange != 0 && contrange != hs->restval)
|
|---|
| 1839 | || (H_PARTIAL (statcode) && !contrange))
|
|---|
| 1840 | {
|
|---|
| 1841 | /* The Range request was somehow misunderstood by the server.
|
|---|
| 1842 | Bail out. */
|
|---|
| 1843 | xfree_null (type);
|
|---|
| 1844 | CLOSE_INVALIDATE (sock);
|
|---|
| 1845 | return RANGEERR;
|
|---|
| 1846 | }
|
|---|
| 1847 | hs->contlen = contlen + contrange;
|
|---|
| 1848 |
|
|---|
| 1849 | if (opt.verbose)
|
|---|
| 1850 | {
|
|---|
| 1851 | if (*dt & RETROKF)
|
|---|
| 1852 | {
|
|---|
| 1853 | /* No need to print this output if the body won't be
|
|---|
| 1854 | downloaded at all, or if the original server response is
|
|---|
| 1855 | printed. */
|
|---|
| 1856 | logputs (LOG_VERBOSE, _("Length: "));
|
|---|
| 1857 | if (contlen != -1)
|
|---|
| 1858 | {
|
|---|
| 1859 | logputs (LOG_VERBOSE, with_thousand_seps (contlen + contrange));
|
|---|
| 1860 | if (contlen + contrange >= 1024)
|
|---|
| 1861 | logprintf (LOG_VERBOSE, " (%s)",
|
|---|
| 1862 | human_readable (contlen + contrange));
|
|---|
| 1863 | if (contrange)
|
|---|
| 1864 | {
|
|---|
| 1865 | if (contlen >= 1024)
|
|---|
| 1866 | logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
|
|---|
| 1867 | with_thousand_seps (contlen),
|
|---|
| 1868 | human_readable (contlen));
|
|---|
| 1869 | else
|
|---|
| 1870 | logprintf (LOG_VERBOSE, _(", %s remaining"),
|
|---|
| 1871 | with_thousand_seps (contlen));
|
|---|
| 1872 | }
|
|---|
| 1873 | }
|
|---|
| 1874 | else
|
|---|
| 1875 | logputs (LOG_VERBOSE,
|
|---|
| 1876 | opt.ignore_length ? _("ignored") : _("unspecified"));
|
|---|
| 1877 | if (type)
|
|---|
| 1878 | logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type));
|
|---|
| 1879 | else
|
|---|
| 1880 | logputs (LOG_VERBOSE, "\n");
|
|---|
| 1881 | }
|
|---|
| 1882 | }
|
|---|
| 1883 | xfree_null (type);
|
|---|
| 1884 | type = NULL; /* We don't need it any more. */
|
|---|
| 1885 |
|
|---|
| 1886 | /* Return if we have no intention of further downloading. */
|
|---|
| 1887 | if (!(*dt & RETROKF) || head_only)
|
|---|
| 1888 | {
|
|---|
| 1889 | /* In case the caller cares to look... */
|
|---|
| 1890 | hs->len = 0;
|
|---|
| 1891 | hs->res = 0;
|
|---|
| 1892 | xfree_null (type);
|
|---|
| 1893 | if (head_only)
|
|---|
| 1894 | /* Pre-1.10 Wget used CLOSE_INVALIDATE here. Now we trust the
|
|---|
| 1895 | servers not to send body in response to a HEAD request. If
|
|---|
| 1896 | you encounter such a server (more likely a broken CGI), use
|
|---|
| 1897 | `--no-http-keep-alive'. */
|
|---|
| 1898 | CLOSE_FINISH (sock);
|
|---|
| 1899 | else if (keep_alive && skip_short_body (sock, contlen))
|
|---|
| 1900 | /* Successfully skipped the body; also keep using the socket. */
|
|---|
| 1901 | CLOSE_FINISH (sock);
|
|---|
| 1902 | else
|
|---|
| 1903 | CLOSE_INVALIDATE (sock);
|
|---|
| 1904 | return RETRFINISHED;
|
|---|
| 1905 | }
|
|---|
| 1906 |
|
|---|
| 1907 | /* Open the local file. */
|
|---|
| 1908 | if (!output_stream)
|
|---|
| 1909 | {
|
|---|
| 1910 | mkalldirs (*hs->local_file);
|
|---|
| 1911 | if (opt.backups)
|
|---|
| 1912 | rotate_backups (*hs->local_file);
|
|---|
| 1913 | if (hs->restval)
|
|---|
| 1914 | fp = fopen (*hs->local_file, "ab");
|
|---|
| 1915 | else if (ALLOW_CLOBBER)
|
|---|
| 1916 | fp = fopen (*hs->local_file, "wb");
|
|---|
| 1917 | else
|
|---|
| 1918 | {
|
|---|
| 1919 | fp = fopen_excl (*hs->local_file, 1);
|
|---|
| 1920 | if (!fp && errno == EEXIST)
|
|---|
| 1921 | {
|
|---|
| 1922 | /* We cannot just invent a new name and use it (which is
|
|---|
| 1923 | what functions like unique_create typically do)
|
|---|
| 1924 | because we told the user we'd use this name.
|
|---|
| 1925 | Instead, return and retry the download. */
|
|---|
| 1926 | logprintf (LOG_NOTQUIET,
|
|---|
| 1927 | _("%s has sprung into existence.\n"),
|
|---|
| 1928 | *hs->local_file);
|
|---|
| 1929 | CLOSE_INVALIDATE (sock);
|
|---|
| 1930 | return FOPEN_EXCL_ERR;
|
|---|
| 1931 | }
|
|---|
| 1932 | }
|
|---|
| 1933 | if (!fp)
|
|---|
| 1934 | {
|
|---|
| 1935 | logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
|
|---|
| 1936 | CLOSE_INVALIDATE (sock);
|
|---|
| 1937 | return FOPENERR;
|
|---|
| 1938 | }
|
|---|
| 1939 | }
|
|---|
| 1940 | else
|
|---|
| 1941 | fp = output_stream;
|
|---|
| 1942 |
|
|---|
| 1943 | /* #### This confuses the timestamping code that checks for file
|
|---|
| 1944 | size. Maybe we should save some additional information? */
|
|---|
| 1945 | if (opt.save_headers)
|
|---|
| 1946 | fwrite (head, 1, strlen (head), fp);
|
|---|
| 1947 |
|
|---|
| 1948 | /* Now we no longer need to store the response header. */
|
|---|
| 1949 | xfree (head);
|
|---|
| 1950 |
|
|---|
| 1951 | /* Download the request body. */
|
|---|
| 1952 | flags = 0;
|
|---|
| 1953 | if (contlen != -1)
|
|---|
| 1954 | /* If content-length is present, read that much; otherwise, read
|
|---|
| 1955 | until EOF. The HTTP spec doesn't require the server to
|
|---|
| 1956 | actually close the connection when it's done sending data. */
|
|---|
| 1957 | flags |= rb_read_exactly;
|
|---|
| 1958 | if (hs->restval > 0 && contrange == 0)
|
|---|
| 1959 | /* If the server ignored our range request, instruct fd_read_body
|
|---|
| 1960 | to skip the first RESTVAL bytes of body. */
|
|---|
| 1961 | flags |= rb_skip_startpos;
|
|---|
| 1962 | hs->len = hs->restval;
|
|---|
| 1963 | hs->rd_size = 0;
|
|---|
| 1964 | hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
|
|---|
| 1965 | hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
|
|---|
| 1966 | flags);
|
|---|
| 1967 |
|
|---|
| 1968 | if (hs->res >= 0)
|
|---|
| 1969 | CLOSE_FINISH (sock);
|
|---|
| 1970 | else
|
|---|
| 1971 | CLOSE_INVALIDATE (sock);
|
|---|
| 1972 |
|
|---|
| 1973 | {
|
|---|
| 1974 | /* Close or flush the file. We have to be careful to check for
|
|---|
| 1975 | error here. Checking the result of fwrite() is not enough --
|
|---|
| 1976 | errors could go unnoticed! */
|
|---|
| 1977 | int flush_res;
|
|---|
| 1978 | if (!output_stream)
|
|---|
| 1979 | flush_res = fclose (fp);
|
|---|
| 1980 | else
|
|---|
| 1981 | flush_res = fflush (fp);
|
|---|
| 1982 | if (flush_res == EOF)
|
|---|
| 1983 | hs->res = -2;
|
|---|
| 1984 | }
|
|---|
| 1985 | if (hs->res == -2)
|
|---|
| 1986 | return FWRITEERR;
|
|---|
| 1987 | return RETRFINISHED;
|
|---|
| 1988 | }
|
|---|
| 1989 |
|
|---|
| 1990 | /* The genuine HTTP loop! This is the part where the retrieval is
|
|---|
| 1991 | retried, and retried, and retried, and... */
|
|---|
| 1992 | uerr_t
|
|---|
| 1993 | http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
|---|
| 1994 | int *dt, struct url *proxy)
|
|---|
| 1995 | {
|
|---|
| 1996 | int count;
|
|---|
| 1997 | int use_ts, got_head = 0; /* time-stamping info */
|
|---|
| 1998 | char *filename_plus_orig_suffix;
|
|---|
| 1999 | char *local_filename = NULL;
|
|---|
| 2000 | char *tms, *locf, *tmrate;
|
|---|
| 2001 | uerr_t err;
|
|---|
| 2002 | time_t tml = -1, tmr = -1; /* local and remote time-stamps */
|
|---|
| 2003 | wgint local_size = 0; /* the size of the local file */
|
|---|
| 2004 | size_t filename_len;
|
|---|
| 2005 | struct http_stat hstat; /* HTTP status */
|
|---|
| 2006 | struct_stat st;
|
|---|
| 2007 | char *dummy = NULL;
|
|---|
| 2008 |
|
|---|
| 2009 | /* This used to be done in main(), but it's a better idea to do it
|
|---|
| 2010 | here so that we don't go through the hoops if we're just using
|
|---|
| 2011 | FTP or whatever. */
|
|---|
| 2012 | if (opt.cookies)
|
|---|
| 2013 | {
|
|---|
| 2014 | if (!wget_cookie_jar)
|
|---|
| 2015 | wget_cookie_jar = cookie_jar_new ();
|
|---|
| 2016 | if (opt.cookies_input && !cookies_loaded_p)
|
|---|
| 2017 | {
|
|---|
| 2018 | cookie_jar_load (wget_cookie_jar, opt.cookies_input);
|
|---|
| 2019 | cookies_loaded_p = 1;
|
|---|
| 2020 | }
|
|---|
| 2021 | }
|
|---|
| 2022 |
|
|---|
| 2023 | *newloc = NULL;
|
|---|
| 2024 |
|
|---|
| 2025 | /* Warn on (likely bogus) wildcard usage in HTTP. */
|
|---|
| 2026 | if (opt.ftp_glob && has_wildcards_p (u->path))
|
|---|
| 2027 | logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
|---|
| 2028 |
|
|---|
| 2029 | xzero (hstat);
|
|---|
| 2030 |
|
|---|
| 2031 | /* Determine the local filename. */
|
|---|
| 2032 | if (local_file && *local_file)
|
|---|
| 2033 | hstat.local_file = local_file;
|
|---|
| 2034 | else if (local_file && !opt.output_document)
|
|---|
| 2035 | {
|
|---|
| 2036 | *local_file = url_file_name (u);
|
|---|
| 2037 | hstat.local_file = local_file;
|
|---|
| 2038 | }
|
|---|
| 2039 | else
|
|---|
| 2040 | {
|
|---|
| 2041 | dummy = url_file_name (u);
|
|---|
| 2042 | hstat.local_file = &dummy;
|
|---|
| 2043 | /* be honest about where we will save the file */
|
|---|
| 2044 | if (local_file && opt.output_document)
|
|---|
| 2045 | *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
|---|
| 2046 | }
|
|---|
| 2047 |
|
|---|
| 2048 | if (!opt.output_document)
|
|---|
| 2049 | locf = *hstat.local_file;
|
|---|
| 2050 | else
|
|---|
| 2051 | locf = opt.output_document;
|
|---|
| 2052 |
|
|---|
| 2053 | hstat.referer = referer;
|
|---|
| 2054 |
|
|---|
| 2055 | filename_len = strlen (*hstat.local_file);
|
|---|
| 2056 | filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
|---|
| 2057 |
|
|---|
| 2058 | if (opt.noclobber && file_exists_p (*hstat.local_file))
|
|---|
| 2059 | {
|
|---|
| 2060 | /* If opt.noclobber is turned on and file already exists, do not
|
|---|
| 2061 | retrieve the file */
|
|---|
| 2062 | logprintf (LOG_VERBOSE, _("\
|
|---|
| 2063 | File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|---|
| 2064 | /* If the file is there, we suppose it's retrieved OK. */
|
|---|
| 2065 | *dt |= RETROKF;
|
|---|
| 2066 |
|
|---|
| 2067 | /* #### Bogusness alert. */
|
|---|
| 2068 | /* If its suffix is "html" or "htm" or similar, assume text/html. */
|
|---|
| 2069 | if (has_html_suffix_p (*hstat.local_file))
|
|---|
| 2070 | *dt |= TEXTHTML;
|
|---|
| 2071 |
|
|---|
| 2072 | xfree_null (dummy);
|
|---|
| 2073 | return RETROK;
|
|---|
| 2074 | }
|
|---|
| 2075 |
|
|---|
| 2076 | use_ts = 0;
|
|---|
| 2077 | if (opt.timestamping)
|
|---|
| 2078 | {
|
|---|
| 2079 | int local_dot_orig_file_exists = 0;
|
|---|
| 2080 |
|
|---|
| 2081 | if (opt.backup_converted)
|
|---|
| 2082 | /* If -K is specified, we'll act on the assumption that it was specified
|
|---|
| 2083 | last time these files were downloaded as well, and instead of just
|
|---|
| 2084 | comparing local file X against server file X, we'll compare local
|
|---|
| 2085 | file X.orig (if extant, else X) against server file X. If -K
|
|---|
| 2086 | _wasn't_ specified last time, or the server contains files called
|
|---|
| 2087 | *.orig, -N will be back to not operating correctly with -k. */
|
|---|
| 2088 | {
|
|---|
| 2089 | /* Would a single s[n]printf() call be faster? --dan
|
|---|
| 2090 |
|
|---|
| 2091 | Definitely not. sprintf() is horribly slow. It's a
|
|---|
| 2092 | different question whether the difference between the two
|
|---|
| 2093 | affects a program. Usually I'd say "no", but at one
|
|---|
| 2094 | point I profiled Wget, and found that a measurable and
|
|---|
| 2095 | non-negligible amount of time was lost calling sprintf()
|
|---|
| 2096 | in url.c. Replacing sprintf with inline calls to
|
|---|
| 2097 | strcpy() and number_to_string() made a difference.
|
|---|
| 2098 | --hniksic */
|
|---|
| 2099 | memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
|
|---|
| 2100 | memcpy (filename_plus_orig_suffix + filename_len,
|
|---|
| 2101 | ".orig", sizeof (".orig"));
|
|---|
| 2102 |
|
|---|
| 2103 | /* Try to stat() the .orig file. */
|
|---|
| 2104 | if (stat (filename_plus_orig_suffix, &st) == 0)
|
|---|
| 2105 | {
|
|---|
| 2106 | local_dot_orig_file_exists = 1;
|
|---|
| 2107 | local_filename = filename_plus_orig_suffix;
|
|---|
| 2108 | }
|
|---|
| 2109 | }
|
|---|
| 2110 |
|
|---|
| 2111 | if (!local_dot_orig_file_exists)
|
|---|
| 2112 | /* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
|---|
| 2113 | if (stat (*hstat.local_file, &st) == 0)
|
|---|
| 2114 | local_filename = *hstat.local_file;
|
|---|
| 2115 |
|
|---|
| 2116 | if (local_filename != NULL)
|
|---|
| 2117 | /* There was a local file, so we'll check later to see if the version
|
|---|
| 2118 | the server has is the same version we already have, allowing us to
|
|---|
| 2119 | skip a download. */
|
|---|
| 2120 | {
|
|---|
| 2121 | use_ts = 1;
|
|---|
| 2122 | tml = st.st_mtime;
|
|---|
| 2123 | #ifdef WINDOWS
|
|---|
| 2124 | /* Modification time granularity is 2 seconds for Windows, so
|
|---|
| 2125 | increase local time by 1 second for later comparison. */
|
|---|
| 2126 | tml++;
|
|---|
| 2127 | #endif
|
|---|
| 2128 | local_size = st.st_size;
|
|---|
| 2129 | got_head = 0;
|
|---|
| 2130 | }
|
|---|
| 2131 | }
|
|---|
| 2132 | /* Reset the counter. */
|
|---|
| 2133 | count = 0;
|
|---|
| 2134 | *dt = 0;
|
|---|
| 2135 | /* THE loop */
|
|---|
| 2136 | do
|
|---|
| 2137 | {
|
|---|
| 2138 | /* Increment the pass counter. */
|
|---|
| 2139 | ++count;
|
|---|
| 2140 | sleep_between_retrievals (count);
|
|---|
| 2141 | /* Get the current time string. */
|
|---|
| 2142 | tms = time_str (NULL);
|
|---|
| 2143 | /* Print fetch message, if opt.verbose. */
|
|---|
| 2144 | if (opt.verbose)
|
|---|
| 2145 | {
|
|---|
| 2146 | char *hurl = url_string (u, 1);
|
|---|
| 2147 | char tmp[256];
|
|---|
| 2148 | strcpy (tmp, " ");
|
|---|
| 2149 | if (count > 1)
|
|---|
| 2150 | sprintf (tmp, _("(try:%2d)"), count);
|
|---|
| 2151 | logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
|
|---|
| 2152 | tms, hurl, tmp, locf);
|
|---|
| 2153 | #ifdef WINDOWS
|
|---|
| 2154 | ws_changetitle (hurl);
|
|---|
| 2155 | #endif
|
|---|
| 2156 | xfree (hurl);
|
|---|
| 2157 | }
|
|---|
| 2158 |
|
|---|
| 2159 | /* Default document type is empty. However, if spider mode is
|
|---|
| 2160 | on or time-stamping is employed, HEAD_ONLY commands is
|
|---|
| 2161 | encoded within *dt. */
|
|---|
| 2162 | if (opt.spider || (use_ts && !got_head))
|
|---|
| 2163 | *dt |= HEAD_ONLY;
|
|---|
| 2164 | else
|
|---|
| 2165 | *dt &= ~HEAD_ONLY;
|
|---|
| 2166 |
|
|---|
| 2167 | /* Decide whether or not to restart. */
|
|---|
| 2168 | if (opt.always_rest
|
|---|
| 2169 | && stat (locf, &st) == 0
|
|---|
| 2170 | && S_ISREG (st.st_mode))
|
|---|
| 2171 | /* When -c is used, continue from on-disk size. (Can't use
|
|---|
| 2172 | hstat.len even if count>1 because we don't want a failed
|
|---|
| 2173 | first attempt to clobber existing data.) */
|
|---|
| 2174 | hstat.restval = st.st_size;
|
|---|
| 2175 | else if (count > 1)
|
|---|
| 2176 | /* otherwise, continue where the previous try left off */
|
|---|
| 2177 | hstat.restval = hstat.len;
|
|---|
| 2178 | else
|
|---|
| 2179 | hstat.restval = 0;
|
|---|
| 2180 |
|
|---|
| 2181 | /* Decide whether to send the no-cache directive. We send it in
|
|---|
| 2182 | two cases:
|
|---|
| 2183 | a) we're using a proxy, and we're past our first retrieval.
|
|---|
| 2184 | Some proxies are notorious for caching incomplete data, so
|
|---|
| 2185 | we require a fresh get.
|
|---|
| 2186 | b) caching is explicitly inhibited. */
|
|---|
| 2187 | if ((proxy && count > 1) /* a */
|
|---|
| 2188 | || !opt.allow_cache /* b */
|
|---|
| 2189 | )
|
|---|
| 2190 | *dt |= SEND_NOCACHE;
|
|---|
| 2191 | else
|
|---|
| 2192 | *dt &= ~SEND_NOCACHE;
|
|---|
| 2193 |
|
|---|
| 2194 | /* Try fetching the document, or at least its head. */
|
|---|
| 2195 | err = gethttp (u, &hstat, dt, proxy);
|
|---|
| 2196 |
|
|---|
| 2197 | /* It's unfortunate that wget determines the local filename before finding
|
|---|
| 2198 | out the Content-Type of the file. Barring a major restructuring of the
|
|---|
| 2199 | code, we need to re-set locf here, since gethttp() may have xrealloc()d
|
|---|
| 2200 | *hstat.local_file to tack on ".html". */
|
|---|
| 2201 | if (!opt.output_document)
|
|---|
| 2202 | locf = *hstat.local_file;
|
|---|
| 2203 |
|
|---|
| 2204 | /* Time? */
|
|---|
| 2205 | tms = time_str (NULL);
|
|---|
| 2206 | /* Get the new location (with or without the redirection). */
|
|---|
| 2207 | if (hstat.newloc)
|
|---|
| 2208 | *newloc = xstrdup (hstat.newloc);
|
|---|
| 2209 | switch (err)
|
|---|
| 2210 | {
|
|---|
| 2211 | case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
|
|---|
| 2212 | case CONERROR: case READERR: case WRITEFAILED:
|
|---|
| 2213 | case RANGEERR: case FOPEN_EXCL_ERR:
|
|---|
| 2214 | /* Non-fatal errors continue executing the loop, which will
|
|---|
| 2215 | bring them to "while" statement at the end, to judge
|
|---|
| 2216 | whether the number of tries was exceeded. */
|
|---|
| 2217 | free_hstat (&hstat);
|
|---|
| 2218 | printwhat (count, opt.ntry);
|
|---|
| 2219 | if (err == FOPEN_EXCL_ERR)
|
|---|
| 2220 | {
|
|---|
| 2221 | /* Re-determine the file name. */
|
|---|
| 2222 | if (local_file && *local_file)
|
|---|
| 2223 | {
|
|---|
| 2224 | xfree (*local_file);
|
|---|
| 2225 | *local_file = url_file_name (u);
|
|---|
| 2226 | hstat.local_file = local_file;
|
|---|
| 2227 | }
|
|---|
| 2228 | else
|
|---|
| 2229 | {
|
|---|
| 2230 | xfree (dummy);
|
|---|
| 2231 | dummy = url_file_name (u);
|
|---|
| 2232 | hstat.local_file = &dummy;
|
|---|
| 2233 | }
|
|---|
| 2234 | /* be honest about where we will save the file */
|
|---|
| 2235 | if (local_file && opt.output_document)
|
|---|
| 2236 | *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
|---|
| 2237 | if (!opt.output_document)
|
|---|
| 2238 | locf = *hstat.local_file;
|
|---|
| 2239 | else
|
|---|
| 2240 | locf = opt.output_document;
|
|---|
| 2241 | }
|
|---|
| 2242 | continue;
|
|---|
| 2243 | case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
|
|---|
| 2244 | case SSLINITFAILED: case CONTNOTSUPPORTED:
|
|---|
| 2245 | /* Fatal errors just return from the function. */
|
|---|
| 2246 | free_hstat (&hstat);
|
|---|
| 2247 | xfree_null (dummy);
|
|---|
| 2248 | return err;
|
|---|
| 2249 | case FWRITEERR: case FOPENERR:
|
|---|
| 2250 | /* Another fatal error. */
|
|---|
| 2251 | logputs (LOG_VERBOSE, "\n");
|
|---|
| 2252 | logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
|---|
| 2253 | *hstat.local_file, strerror (errno));
|
|---|
| 2254 | free_hstat (&hstat);
|
|---|
| 2255 | xfree_null (dummy);
|
|---|
| 2256 | return err;
|
|---|
| 2257 | case CONSSLERR:
|
|---|
| 2258 | /* Another fatal error. */
|
|---|
| 2259 | logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
|---|
| 2260 | free_hstat (&hstat);
|
|---|
| 2261 | xfree_null (dummy);
|
|---|
| 2262 | return err;
|
|---|
| 2263 | case NEWLOCATION:
|
|---|
| 2264 | /* Return the new location to the caller. */
|
|---|
| 2265 | if (!hstat.newloc)
|
|---|
| 2266 | {
|
|---|
| 2267 | logprintf (LOG_NOTQUIET,
|
|---|
| 2268 | _("ERROR: Redirection (%d) without location.\n"),
|
|---|
| 2269 | hstat.statcode);
|
|---|
| 2270 | free_hstat (&hstat);
|
|---|
| 2271 | xfree_null (dummy);
|
|---|
| 2272 | return WRONGCODE;
|
|---|
| 2273 | }
|
|---|
| 2274 | free_hstat (&hstat);
|
|---|
| 2275 | xfree_null (dummy);
|
|---|
| 2276 | return NEWLOCATION;
|
|---|
| 2277 | case RETRUNNEEDED:
|
|---|
| 2278 | /* The file was already fully retrieved. */
|
|---|
| 2279 | free_hstat (&hstat);
|
|---|
| 2280 | xfree_null (dummy);
|
|---|
| 2281 | return RETROK;
|
|---|
| 2282 | case RETRFINISHED:
|
|---|
| 2283 | /* Deal with you later. */
|
|---|
| 2284 | break;
|
|---|
| 2285 | default:
|
|---|
| 2286 | /* All possibilities should have been exhausted. */
|
|---|
| 2287 | abort ();
|
|---|
| 2288 | }
|
|---|
| 2289 | if (!(*dt & RETROKF))
|
|---|
| 2290 | {
|
|---|
| 2291 | if (!opt.verbose)
|
|---|
| 2292 | {
|
|---|
| 2293 | /* #### Ugly ugly ugly! */
|
|---|
| 2294 | char *hurl = url_string (u, 1);
|
|---|
| 2295 | logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
|
|---|
| 2296 | xfree (hurl);
|
|---|
| 2297 | }
|
|---|
| 2298 | logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
|
|---|
| 2299 | tms, hstat.statcode, escnonprint (hstat.error));
|
|---|
| 2300 | logputs (LOG_VERBOSE, "\n");
|
|---|
| 2301 | free_hstat (&hstat);
|
|---|
| 2302 | xfree_null (dummy);
|
|---|
| 2303 | return WRONGCODE;
|
|---|
| 2304 | }
|
|---|
| 2305 |
|
|---|
| 2306 | /* Did we get the time-stamp? */
|
|---|
| 2307 | if (!got_head)
|
|---|
| 2308 | {
|
|---|
| 2309 | if (opt.timestamping && !hstat.remote_time)
|
|---|
| 2310 | {
|
|---|
| 2311 | logputs (LOG_NOTQUIET, _("\
|
|---|
| 2312 | Last-modified header missing -- time-stamps turned off.\n"));
|
|---|
| 2313 | }
|
|---|
| 2314 | else if (hstat.remote_time)
|
|---|
| 2315 | {
|
|---|
| 2316 | /* Convert the date-string into struct tm. */
|
|---|
| 2317 | tmr = http_atotm (hstat.remote_time);
|
|---|
| 2318 | if (tmr == (time_t) (-1))
|
|---|
| 2319 | logputs (LOG_VERBOSE, _("\
|
|---|
| 2320 | Last-modified header invalid -- time-stamp ignored.\n"));
|
|---|
| 2321 | }
|
|---|
| 2322 | }
|
|---|
| 2323 |
|
|---|
| 2324 | /* The time-stamping section. */
|
|---|
| 2325 | if (use_ts)
|
|---|
| 2326 | {
|
|---|
| 2327 | got_head = 1;
|
|---|
| 2328 | *dt &= ~HEAD_ONLY;
|
|---|
| 2329 | use_ts = 0; /* no more time-stamping */
|
|---|
| 2330 | count = 0; /* the retrieve count for HEAD is
|
|---|
| 2331 | reset */
|
|---|
| 2332 | if (hstat.remote_time && tmr != (time_t) (-1))
|
|---|
| 2333 | {
|
|---|
| 2334 | /* Now time-stamping can be used validly. Time-stamping
|
|---|
| 2335 | means that if the sizes of the local and remote file
|
|---|
| 2336 | match, and local file is newer than the remote file,
|
|---|
| 2337 | it will not be retrieved. Otherwise, the normal
|
|---|
| 2338 | download procedure is resumed. */
|
|---|
| 2339 | if (tml >= tmr &&
|
|---|
| 2340 | (hstat.contlen == -1 || local_size == hstat.contlen))
|
|---|
| 2341 | {
|
|---|
| 2342 | logprintf (LOG_VERBOSE, _("\
|
|---|
| 2343 | Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
|---|
| 2344 | local_filename);
|
|---|
| 2345 | free_hstat (&hstat);
|
|---|
| 2346 | xfree_null (dummy);
|
|---|
| 2347 | return RETROK;
|
|---|
| 2348 | }
|
|---|
| 2349 | else if (tml >= tmr)
|
|---|
| 2350 | logprintf (LOG_VERBOSE, _("\
|
|---|
| 2351 | The sizes do not match (local %s) -- retrieving.\n"),
|
|---|
| 2352 | number_to_static_string (local_size));
|
|---|
| 2353 | else
|
|---|
| 2354 | logputs (LOG_VERBOSE,
|
|---|
| 2355 | _("Remote file is newer, retrieving.\n"));
|
|---|
| 2356 | }
|
|---|
| 2357 | free_hstat (&hstat);
|
|---|
| 2358 | continue;
|
|---|
| 2359 | }
|
|---|
| 2360 | if ((tmr != (time_t) (-1))
|
|---|
| 2361 | && !opt.spider
|
|---|
| 2362 | && ((hstat.len == hstat.contlen) ||
|
|---|
| 2363 | ((hstat.res == 0) && (hstat.contlen == -1))))
|
|---|
| 2364 | {
|
|---|
| 2365 | /* #### This code repeats in http.c and ftp.c. Move it to a
|
|---|
| 2366 | function! */
|
|---|
| 2367 | const char *fl = NULL;
|
|---|
| 2368 | if (opt.output_document)
|
|---|
| 2369 | {
|
|---|
| 2370 | if (output_stream_regular)
|
|---|
| 2371 | fl = opt.output_document;
|
|---|
| 2372 | }
|
|---|
| 2373 | else
|
|---|
| 2374 | fl = *hstat.local_file;
|
|---|
| 2375 | if (fl)
|
|---|
| 2376 | touch (fl, tmr);
|
|---|
| 2377 | }
|
|---|
| 2378 | /* End of time-stamping section. */
|
|---|
| 2379 |
|
|---|
| 2380 | if (opt.spider)
|
|---|
| 2381 | {
|
|---|
| 2382 | logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
|
|---|
| 2383 | escnonprint (hstat.error));
|
|---|
| 2384 | xfree_null (dummy);
|
|---|
| 2385 | return RETROK;
|
|---|
| 2386 | }
|
|---|
| 2387 |
|
|---|
| 2388 | tmrate = retr_rate (hstat.rd_size, hstat.dltime, 0);
|
|---|
| 2389 |
|
|---|
| 2390 | if (hstat.len == hstat.contlen)
|
|---|
| 2391 | {
|
|---|
| 2392 | if (*dt & RETROKF)
|
|---|
| 2393 | {
|
|---|
| 2394 | logprintf (LOG_VERBOSE,
|
|---|
| 2395 | _("%s (%s) - `%s' saved [%s/%s]\n\n"),
|
|---|
| 2396 | tms, tmrate, locf,
|
|---|
| 2397 | number_to_static_string (hstat.len),
|
|---|
| 2398 | number_to_static_string (hstat.contlen));
|
|---|
| 2399 | logprintf (LOG_NONVERBOSE,
|
|---|
| 2400 | "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
|
|---|
| 2401 | tms, u->url,
|
|---|
| 2402 | number_to_static_string (hstat.len),
|
|---|
| 2403 | number_to_static_string (hstat.contlen),
|
|---|
| 2404 | locf, count);
|
|---|
| 2405 | }
|
|---|
| 2406 | ++opt.numurls;
|
|---|
| 2407 | total_downloaded_bytes += hstat.len;
|
|---|
| 2408 |
|
|---|
| 2409 | /* Remember that we downloaded the file for later ".orig" code. */
|
|---|
| 2410 | if (*dt & ADDED_HTML_EXTENSION)
|
|---|
| 2411 | downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
|
|---|
| 2412 | else
|
|---|
| 2413 | downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
|---|
| 2414 |
|
|---|
| 2415 | free_hstat (&hstat);
|
|---|
| 2416 | xfree_null (dummy);
|
|---|
| 2417 | return RETROK;
|
|---|
| 2418 | }
|
|---|
| 2419 | else if (hstat.res == 0) /* No read error */
|
|---|
| 2420 | {
|
|---|
| 2421 | if (hstat.contlen == -1) /* We don't know how much we were supposed
|
|---|
| 2422 | to get, so assume we succeeded. */
|
|---|
| 2423 | {
|
|---|
| 2424 | if (*dt & RETROKF)
|
|---|
| 2425 | {
|
|---|
| 2426 | logprintf (LOG_VERBOSE,
|
|---|
| 2427 | _("%s (%s) - `%s' saved [%s]\n\n"),
|
|---|
| 2428 | tms, tmrate, locf,
|
|---|
| 2429 | number_to_static_string (hstat.len));
|
|---|
| 2430 | logprintf (LOG_NONVERBOSE,
|
|---|
| 2431 | "%s URL:%s [%s] -> \"%s\" [%d]\n",
|
|---|
| 2432 | tms, u->url, number_to_static_string (hstat.len),
|
|---|
| 2433 | locf, count);
|
|---|
| 2434 | }
|
|---|
| 2435 | ++opt.numurls;
|
|---|
| 2436 | total_downloaded_bytes += hstat.len;
|
|---|
| 2437 |
|
|---|
| 2438 | /* Remember that we downloaded the file for later ".orig" code. */
|
|---|
| 2439 | if (*dt & ADDED_HTML_EXTENSION)
|
|---|
| 2440 | downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
|
|---|
| 2441 | else
|
|---|
| 2442 | downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
|---|
| 2443 |
|
|---|
| 2444 | free_hstat (&hstat);
|
|---|
| 2445 | xfree_null (dummy);
|
|---|
| 2446 | return RETROK;
|
|---|
| 2447 | }
|
|---|
| 2448 | else if (hstat.len < hstat.contlen) /* meaning we lost the
|
|---|
| 2449 | connection too soon */
|
|---|
| 2450 | {
|
|---|
| 2451 | logprintf (LOG_VERBOSE,
|
|---|
| 2452 | _("%s (%s) - Connection closed at byte %s. "),
|
|---|
| 2453 | tms, tmrate, number_to_static_string (hstat.len));
|
|---|
| 2454 | printwhat (count, opt.ntry);
|
|---|
| 2455 | free_hstat (&hstat);
|
|---|
| 2456 | continue;
|
|---|
| 2457 | }
|
|---|
| 2458 | else
|
|---|
| 2459 | /* Getting here would mean reading more data than
|
|---|
| 2460 | requested with content-length, which we never do. */
|
|---|
| 2461 | abort ();
|
|---|
| 2462 | }
|
|---|
| 2463 | else /* now hstat.res can only be -1 */
|
|---|
| 2464 | {
|
|---|
| 2465 | if (hstat.contlen == -1)
|
|---|
| 2466 | {
|
|---|
| 2467 | logprintf (LOG_VERBOSE,
|
|---|
| 2468 | _("%s (%s) - Read error at byte %s (%s)."),
|
|---|
| 2469 | tms, tmrate, number_to_static_string (hstat.len),
|
|---|
| 2470 | strerror (errno));
|
|---|
| 2471 | printwhat (count, opt.ntry);
|
|---|
| 2472 | free_hstat (&hstat);
|
|---|
| 2473 | continue;
|
|---|
| 2474 | }
|
|---|
| 2475 | else /* hstat.res == -1 and contlen is given */
|
|---|
| 2476 | {
|
|---|
| 2477 | logprintf (LOG_VERBOSE,
|
|---|
| 2478 | _("%s (%s) - Read error at byte %s/%s (%s). "),
|
|---|
| 2479 | tms, tmrate,
|
|---|
| 2480 | number_to_static_string (hstat.len),
|
|---|
| 2481 | number_to_static_string (hstat.contlen),
|
|---|
| 2482 | strerror (errno));
|
|---|
| 2483 | printwhat (count, opt.ntry);
|
|---|
| 2484 | free_hstat (&hstat);
|
|---|
| 2485 | continue;
|
|---|
| 2486 | }
|
|---|
| 2487 | }
|
|---|
| 2488 | /* not reached */
|
|---|
| 2489 | }
|
|---|
| 2490 | while (!opt.ntry || (count < opt.ntry));
|
|---|
| 2491 | return TRYLIMEXC;
|
|---|
| 2492 | }
|
|---|
| 2493 | |
|---|
| 2494 |
|
|---|
| 2495 | /* Check whether the result of strptime() indicates success.
|
|---|
| 2496 | strptime() returns the pointer to how far it got to in the string.
|
|---|
| 2497 | The processing has been successful if the string is at `GMT' or
|
|---|
| 2498 | `+X', or at the end of the string.
|
|---|
| 2499 |
|
|---|
| 2500 | In extended regexp parlance, the function returns 1 if P matches
|
|---|
| 2501 | "^ *(GMT|[+-][0-9]|$)", 0 otherwise. P being NULL (which strptime
|
|---|
| 2502 | can return) is considered a failure and 0 is returned. */
|
|---|
| 2503 | static int
|
|---|
| 2504 | check_end (const char *p)
|
|---|
| 2505 | {
|
|---|
| 2506 | if (!p)
|
|---|
| 2507 | return 0;
|
|---|
| 2508 | while (ISSPACE (*p))
|
|---|
| 2509 | ++p;
|
|---|
| 2510 | if (!*p
|
|---|
| 2511 | || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
|
|---|
| 2512 | || ((p[0] == '+' || p[0] == '-') && ISDIGIT (p[1])))
|
|---|
| 2513 | return 1;
|
|---|
| 2514 | else
|
|---|
| 2515 | return 0;
|
|---|
| 2516 | }
|
|---|
| 2517 |
|
|---|
| 2518 | /* Convert the textual specification of time in TIME_STRING to the
|
|---|
| 2519 | number of seconds since the Epoch.
|
|---|
| 2520 |
|
|---|
| 2521 | TIME_STRING can be in any of the three formats RFC2616 allows the
|
|---|
| 2522 | HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
|
|---|
| 2523 | as well as the time format used in the Set-Cookie header.
|
|---|
| 2524 | Timezones are ignored, and should be GMT.
|
|---|
| 2525 |
|
|---|
| 2526 | Return the computed time_t representation, or -1 if the conversion
|
|---|
| 2527 | fails.
|
|---|
| 2528 |
|
|---|
| 2529 | This function uses strptime with various string formats for parsing
|
|---|
| 2530 | TIME_STRING. This results in a parser that is not as lenient in
|
|---|
| 2531 | interpreting TIME_STRING as I would like it to be. Being based on
|
|---|
| 2532 | strptime, it always allows shortened months, one-digit days, etc.,
|
|---|
| 2533 | but due to the multitude of formats in which time can be
|
|---|
| 2534 | represented, an ideal HTTP time parser would be even more
|
|---|
| 2535 | forgiving. It should completely ignore things like week days and
|
|---|
| 2536 | concentrate only on the various forms of representing years,
|
|---|
| 2537 | months, days, hours, minutes, and seconds. For example, it would
|
|---|
| 2538 | be nice if it accepted ISO 8601 out of the box.
|
|---|
| 2539 |
|
|---|
| 2540 | I've investigated free and PD code for this purpose, but none was
|
|---|
| 2541 | usable. getdate was big and unwieldy, and had potential copyright
|
|---|
| 2542 | issues, or so I was informed. Dr. Marcus Hennecke's atotm(),
|
|---|
| 2543 | distributed with phttpd, is excellent, but we cannot use it because
|
|---|
| 2544 | it is not assigned to the FSF. So I stuck it with strptime. */
|
|---|
| 2545 |
|
|---|
| 2546 | time_t
|
|---|
| 2547 | http_atotm (const char *time_string)
|
|---|
| 2548 | {
|
|---|
| 2549 | /* NOTE: Solaris strptime man page claims that %n and %t match white
|
|---|
| 2550 | space, but that's not universally available. Instead, we simply
|
|---|
| 2551 | use ` ' to mean "skip all WS", which works under all strptime
|
|---|
| 2552 | implementations I've tested. */
|
|---|
| 2553 |
|
|---|
| 2554 | static const char *time_formats[] = {
|
|---|
| 2555 | "%a, %d %b %Y %T", /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
|
|---|
| 2556 | "%A, %d-%b-%y %T", /* rfc850: Thursday, 29-Jan-98 22:12:57 */
|
|---|
| 2557 | "%a %b %d %T %Y", /* asctime: Thu Jan 29 22:12:57 1998 */
|
|---|
| 2558 | "%a, %d-%b-%Y %T" /* cookies: Thu, 29-Jan-1998 22:12:57
|
|---|
| 2559 | (used in Set-Cookie, defined in the
|
|---|
| 2560 | Netscape cookie specification.) */
|
|---|
| 2561 | };
|
|---|
| 2562 | int i;
|
|---|
| 2563 |
|
|---|
| 2564 | for (i = 0; i < countof (time_formats); i++)
|
|---|
| 2565 | {
|
|---|
| 2566 | struct tm t;
|
|---|
| 2567 |
|
|---|
| 2568 | /* Some versions of strptime use the existing contents of struct
|
|---|
| 2569 | tm to recalculate the date according to format. Zero it out
|
|---|
| 2570 | to prevent garbage from the stack influencing strptime. */
|
|---|
| 2571 | xzero (t);
|
|---|
| 2572 |
|
|---|
| 2573 | /* Solaris strptime fails to recognize English month names in
|
|---|
| 2574 | non-English locales, which we work around by not setting the
|
|---|
| 2575 | LC_TIME category. Another way would be to temporarily set
|
|---|
| 2576 | locale to C before invoking strptime, but that's slow and
|
|---|
| 2577 | messy. GNU strptime does not have this problem because it
|
|---|
| 2578 | recognizes English month names along with the local ones. */
|
|---|
| 2579 |
|
|---|
| 2580 | if (check_end (strptime (time_string, time_formats[i], &t)))
|
|---|
| 2581 | return timegm (&t);
|
|---|
| 2582 | }
|
|---|
| 2583 |
|
|---|
| 2584 | /* All formats have failed. */
|
|---|
| 2585 | return -1;
|
|---|
| 2586 | }
|
|---|
| 2587 | |
|---|
| 2588 |
|
|---|
| 2589 | /* Authorization support: We support three authorization schemes:
|
|---|
| 2590 |
|
|---|
| 2591 | * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
|
|---|
| 2592 |
|
|---|
| 2593 | * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
|
|---|
| 2594 | consisting of answering to the server's challenge with the proper
|
|---|
| 2595 | MD5 digests.
|
|---|
| 2596 |
|
|---|
| 2597 | * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
|
|---|
| 2598 | Stenberg for libcurl. Like digest, NTLM is based on a
|
|---|
| 2599 | challenge-response mechanism, but unlike digest, it is non-standard
|
|---|
| 2600 | (authenticates TCP connections rather than requests), undocumented
|
|---|
| 2601 | and Microsoft-specific. */
|
|---|
| 2602 |
|
|---|
| 2603 | /* Create the authentication header contents for the `Basic' scheme.
|
|---|
| 2604 | This is done by encoding the string "USER:PASS" to base64 and
|
|---|
| 2605 | prepending the string "Basic " in front of it. */
|
|---|
| 2606 |
|
|---|
| 2607 | static char *
|
|---|
| 2608 | basic_authentication_encode (const char *user, const char *passwd)
|
|---|
| 2609 | {
|
|---|
| 2610 | char *t1, *t2;
|
|---|
| 2611 | int len1 = strlen (user) + 1 + strlen (passwd);
|
|---|
| 2612 |
|
|---|
| 2613 | t1 = (char *)alloca (len1 + 1);
|
|---|
| 2614 | sprintf (t1, "%s:%s", user, passwd);
|
|---|
| 2615 |
|
|---|
| 2616 | t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
|
|---|
| 2617 | base64_encode (t1, len1, t2);
|
|---|
| 2618 |
|
|---|
| 2619 | return concat_strings ("Basic ", t2, (char *) 0);
|
|---|
| 2620 | }
|
|---|
| 2621 |
|
|---|
| 2622 | #define SKIP_WS(x) do { \
|
|---|
| 2623 | while (ISSPACE (*(x))) \
|
|---|
| 2624 | ++(x); \
|
|---|
| 2625 | } while (0)
|
|---|
| 2626 |
|
|---|
| 2627 | #ifdef ENABLE_DIGEST
|
|---|
| 2628 | /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
|
|---|
| 2629 | of a field in such a header. If the field is the one specified by
|
|---|
| 2630 | ATTR_NAME ("realm", "opaque", and "nonce" are used by the current
|
|---|
| 2631 | digest authorization code), extract its value in the (char*)
|
|---|
| 2632 | variable pointed by RET. Returns negative on a malformed header,
|
|---|
| 2633 | or number of bytes that have been parsed by this call. */
|
|---|
| 2634 | static int
|
|---|
| 2635 | extract_header_attr (const char *au, const char *attr_name, char **ret)
|
|---|
| 2636 | {
|
|---|
| 2637 | const char *ep;
|
|---|
| 2638 | const char *cp = au;
|
|---|
| 2639 |
|
|---|
| 2640 | if (strncmp (cp, attr_name, strlen (attr_name)) == 0)
|
|---|
| 2641 | {
|
|---|
| 2642 | cp += strlen (attr_name);
|
|---|
| 2643 | if (!*cp)
|
|---|
| 2644 | return -1;
|
|---|
| 2645 | SKIP_WS (cp);
|
|---|
| 2646 | if (*cp != '=')
|
|---|
| 2647 | return -1;
|
|---|
| 2648 | if (!*++cp)
|
|---|
| 2649 | return -1;
|
|---|
| 2650 | SKIP_WS (cp);
|
|---|
| 2651 | if (*cp != '\"')
|
|---|
| 2652 | return -1;
|
|---|
| 2653 | if (!*++cp)
|
|---|
| 2654 | return -1;
|
|---|
| 2655 | for (ep = cp; *ep && *ep != '\"'; ep++)
|
|---|
| 2656 | ;
|
|---|
| 2657 | if (!*ep)
|
|---|
| 2658 | return -1;
|
|---|
| 2659 | xfree_null (*ret);
|
|---|
| 2660 | *ret = strdupdelim (cp, ep);
|
|---|
| 2661 | return ep - au + 1;
|
|---|
| 2662 | }
|
|---|
| 2663 | else
|
|---|
| 2664 | return 0;
|
|---|
| 2665 | }
|
|---|
| 2666 |
|
|---|
| 2667 | /* Dump the hexadecimal representation of HASH to BUF. HASH should be
|
|---|
| 2668 | an array of 16 bytes containing the hash keys, and BUF should be a
|
|---|
| 2669 | buffer of 33 writable characters (32 for hex digits plus one for
|
|---|
| 2670 | zero termination). */
|
|---|
| 2671 | static void
|
|---|
| 2672 | dump_hash (unsigned char *buf, const unsigned char *hash)
|
|---|
| 2673 | {
|
|---|
| 2674 | int i;
|
|---|
| 2675 |
|
|---|
| 2676 | for (i = 0; i < MD5_HASHLEN; i++, hash++)
|
|---|
| 2677 | {
|
|---|
| 2678 | *buf++ = XNUM_TO_digit (*hash >> 4);
|
|---|
| 2679 | *buf++ = XNUM_TO_digit (*hash & 0xf);
|
|---|
| 2680 | }
|
|---|
| 2681 | *buf = '\0';
|
|---|
| 2682 | }
|
|---|
| 2683 |
|
|---|
| 2684 | /* Take the line apart to find the challenge, and compose a digest
|
|---|
| 2685 | authorization header. See RFC2069 section 2.1.2. */
|
|---|
| 2686 | static char *
|
|---|
| 2687 | digest_authentication_encode (const char *au, const char *user,
|
|---|
| 2688 | const char *passwd, const char *method,
|
|---|
| 2689 | const char *path)
|
|---|
| 2690 | {
|
|---|
| 2691 | static char *realm, *opaque, *nonce;
|
|---|
| 2692 | static struct {
|
|---|
| 2693 | const char *name;
|
|---|
| 2694 | char **variable;
|
|---|
| 2695 | } options[] = {
|
|---|
| 2696 | { "realm", &realm },
|
|---|
| 2697 | { "opaque", &opaque },
|
|---|
| 2698 | { "nonce", &nonce }
|
|---|
| 2699 | };
|
|---|
| 2700 | char *res;
|
|---|
| 2701 |
|
|---|
| 2702 | realm = opaque = nonce = NULL;
|
|---|
| 2703 |
|
|---|
| 2704 | au += 6; /* skip over `Digest' */
|
|---|
| 2705 | while (*au)
|
|---|
| 2706 | {
|
|---|
| 2707 | int i;
|
|---|
| 2708 |
|
|---|
| 2709 | SKIP_WS (au);
|
|---|
| 2710 | for (i = 0; i < countof (options); i++)
|
|---|
| 2711 | {
|
|---|
| 2712 | int skip = extract_header_attr (au, options[i].name,
|
|---|
| 2713 | options[i].variable);
|
|---|
| 2714 | if (skip < 0)
|
|---|
| 2715 | {
|
|---|
| 2716 | xfree_null (realm);
|
|---|
| 2717 | xfree_null (opaque);
|
|---|
| 2718 | xfree_null (nonce);
|
|---|
| 2719 | return NULL;
|
|---|
| 2720 | }
|
|---|
| 2721 | else if (skip)
|
|---|
| 2722 | {
|
|---|
| 2723 | au += skip;
|
|---|
| 2724 | break;
|
|---|
| 2725 | }
|
|---|
| 2726 | }
|
|---|
| 2727 | if (i == countof (options))
|
|---|
| 2728 | {
|
|---|
| 2729 | while (*au && *au != '=')
|
|---|
| 2730 | au++;
|
|---|
| 2731 | if (*au && *++au)
|
|---|
| 2732 | {
|
|---|
| 2733 | SKIP_WS (au);
|
|---|
| 2734 | if (*au == '\"')
|
|---|
| 2735 | {
|
|---|
| 2736 | au++;
|
|---|
| 2737 | while (*au && *au != '\"')
|
|---|
| 2738 | au++;
|
|---|
| 2739 | if (*au)
|
|---|
| 2740 | au++;
|
|---|
| 2741 | }
|
|---|
| 2742 | }
|
|---|
| 2743 | }
|
|---|
| 2744 | while (*au && *au != ',')
|
|---|
| 2745 | au++;
|
|---|
| 2746 | if (*au)
|
|---|
| 2747 | au++;
|
|---|
| 2748 | }
|
|---|
| 2749 | if (!realm || !nonce || !user || !passwd || !path || !method)
|
|---|
| 2750 | {
|
|---|
| 2751 | xfree_null (realm);
|
|---|
| 2752 | xfree_null (opaque);
|
|---|
| 2753 | xfree_null (nonce);
|
|---|
| 2754 | return NULL;
|
|---|
| 2755 | }
|
|---|
| 2756 |
|
|---|
| 2757 | /* Calculate the digest value. */
|
|---|
| 2758 | {
|
|---|
| 2759 | ALLOCA_MD5_CONTEXT (ctx);
|
|---|
| 2760 | unsigned char hash[MD5_HASHLEN];
|
|---|
| 2761 | unsigned char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
|
|---|
| 2762 | unsigned char response_digest[MD5_HASHLEN * 2 + 1];
|
|---|
| 2763 |
|
|---|
| 2764 | /* A1BUF = H(user ":" realm ":" password) */
|
|---|
| 2765 | gen_md5_init (ctx);
|
|---|
| 2766 | gen_md5_update ((unsigned char *)user, strlen (user), ctx);
|
|---|
| 2767 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
|---|
| 2768 | gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
|
|---|
| 2769 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
|---|
| 2770 | gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
|
|---|
| 2771 | gen_md5_finish (ctx, hash);
|
|---|
| 2772 | dump_hash (a1buf, hash);
|
|---|
| 2773 |
|
|---|
| 2774 | /* A2BUF = H(method ":" path) */
|
|---|
| 2775 | gen_md5_init (ctx);
|
|---|
| 2776 | gen_md5_update ((unsigned char *)method, strlen (method), ctx);
|
|---|
| 2777 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
|---|
| 2778 | gen_md5_update ((unsigned char *)path, strlen (path), ctx);
|
|---|
| 2779 | gen_md5_finish (ctx, hash);
|
|---|
| 2780 | dump_hash (a2buf, hash);
|
|---|
| 2781 |
|
|---|
| 2782 | /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
|
|---|
| 2783 | gen_md5_init (ctx);
|
|---|
| 2784 | gen_md5_update (a1buf, MD5_HASHLEN * 2, ctx);
|
|---|
| 2785 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
|---|
| 2786 | gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
|
|---|
| 2787 | gen_md5_update ((unsigned char *)":", 1, ctx);
|
|---|
| 2788 | gen_md5_update (a2buf, MD5_HASHLEN * 2, ctx);
|
|---|
| 2789 | gen_md5_finish (ctx, hash);
|
|---|
| 2790 | dump_hash (response_digest, hash);
|
|---|
| 2791 |
|
|---|
| 2792 | res = (char*) xmalloc (strlen (user)
|
|---|
| 2793 | + strlen (user)
|
|---|
| 2794 | + strlen (realm)
|
|---|
| 2795 | + strlen (nonce)
|
|---|
| 2796 | + strlen (path)
|
|---|
| 2797 | + 2 * MD5_HASHLEN /*strlen (response_digest)*/
|
|---|
| 2798 | + (opaque ? strlen (opaque) : 0)
|
|---|
| 2799 | + 128);
|
|---|
| 2800 | sprintf (res, "Digest \
|
|---|
| 2801 | username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
|
|---|
| 2802 | user, realm, nonce, path, response_digest);
|
|---|
| 2803 | if (opaque)
|
|---|
| 2804 | {
|
|---|
| 2805 | char *p = res + strlen (res);
|
|---|
| 2806 | strcat (p, ", opaque=\"");
|
|---|
| 2807 | strcat (p, opaque);
|
|---|
| 2808 | strcat (p, "\"");
|
|---|
| 2809 | }
|
|---|
| 2810 | }
|
|---|
| 2811 | return res;
|
|---|
| 2812 | }
|
|---|
| 2813 | #endif /* ENABLE_DIGEST */
|
|---|
| 2814 |
|
|---|
| 2815 | /* Computing the size of a string literal must take into account that
|
|---|
| 2816 | value returned by sizeof includes the terminating \0. */
|
|---|
| 2817 | #define STRSIZE(literal) (sizeof (literal) - 1)
|
|---|
| 2818 |
|
|---|
| 2819 | /* Whether chars in [b, e) begin with the literal string provided as
|
|---|
| 2820 | first argument and are followed by whitespace or terminating \0.
|
|---|
| 2821 | The comparison is case-insensitive. */
|
|---|
| 2822 | #define STARTS(literal, b, e) \
|
|---|
| 2823 | ((e) - (b) >= STRSIZE (literal) \
|
|---|
| 2824 | && 0 == strncasecmp (b, literal, STRSIZE (literal)) \
|
|---|
| 2825 | && ((e) - (b) == STRSIZE (literal) \
|
|---|
| 2826 | || ISSPACE (b[STRSIZE (literal)])))
|
|---|
| 2827 |
|
|---|
| 2828 | static int
|
|---|
| 2829 | known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
|
|---|
| 2830 | {
|
|---|
| 2831 | return STARTS ("Basic", hdrbeg, hdrend)
|
|---|
| 2832 | #ifdef ENABLE_DIGEST
|
|---|
| 2833 | || STARTS ("Digest", hdrbeg, hdrend)
|
|---|
| 2834 | #endif
|
|---|
| 2835 | #ifdef ENABLE_NTLM
|
|---|
| 2836 | || STARTS ("NTLM", hdrbeg, hdrend)
|
|---|
| 2837 | #endif
|
|---|
| 2838 | ;
|
|---|
| 2839 | }
|
|---|
| 2840 |
|
|---|
| 2841 | #undef STARTS
|
|---|
| 2842 |
|
|---|
| 2843 | /* Create the HTTP authorization request header. When the
|
|---|
| 2844 | `WWW-Authenticate' response header is seen, according to the
|
|---|
| 2845 | authorization scheme specified in that header (`Basic' and `Digest'
|
|---|
| 2846 | are supported by the current implementation), produce an
|
|---|
| 2847 | appropriate HTTP authorization request header. */
|
|---|
| 2848 | static char *
|
|---|
| 2849 | create_authorization_line (const char *au, const char *user,
|
|---|
| 2850 | const char *passwd, const char *method,
|
|---|
| 2851 | const char *path, int *finished)
|
|---|
| 2852 | {
|
|---|
| 2853 | /* We are called only with known schemes, so we can dispatch on the
|
|---|
| 2854 | first letter. */
|
|---|
| 2855 | switch (TOUPPER (*au))
|
|---|
| 2856 | {
|
|---|
| 2857 | case 'B': /* Basic */
|
|---|
| 2858 | *finished = 1;
|
|---|
| 2859 | return basic_authentication_encode (user, passwd);
|
|---|
| 2860 | #ifdef ENABLE_DIGEST
|
|---|
| 2861 | case 'D': /* Digest */
|
|---|
| 2862 | *finished = 1;
|
|---|
| 2863 | return digest_authentication_encode (au, user, passwd, method, path);
|
|---|
| 2864 | #endif
|
|---|
| 2865 | #ifdef ENABLE_NTLM
|
|---|
| 2866 | case 'N': /* NTLM */
|
|---|
| 2867 | if (!ntlm_input (&pconn.ntlm, au))
|
|---|
| 2868 | {
|
|---|
| 2869 | *finished = 1;
|
|---|
| 2870 | return NULL;
|
|---|
| 2871 | }
|
|---|
| 2872 | return ntlm_output (&pconn.ntlm, user, passwd, finished);
|
|---|
| 2873 | #endif
|
|---|
| 2874 | default:
|
|---|
| 2875 | /* We shouldn't get here -- this function should be only called
|
|---|
| 2876 | with values approved by known_authentication_scheme_p. */
|
|---|
| 2877 | abort ();
|
|---|
| 2878 | }
|
|---|
| 2879 | }
|
|---|
| 2880 | |
|---|
| 2881 |
|
|---|
| 2882 | void
|
|---|
| 2883 | save_cookies (void)
|
|---|
| 2884 | {
|
|---|
| 2885 | if (wget_cookie_jar)
|
|---|
| 2886 | cookie_jar_save (wget_cookie_jar, opt.cookies_output);
|
|---|
| 2887 | }
|
|---|
| 2888 |
|
|---|
| 2889 | void
|
|---|
| 2890 | http_cleanup (void)
|
|---|
| 2891 | {
|
|---|
| 2892 | xfree_null (pconn.host);
|
|---|
| 2893 | if (wget_cookie_jar)
|
|---|
| 2894 | cookie_jar_delete (wget_cookie_jar);
|
|---|
| 2895 | }
|
|---|