| 1 | /* Support for cookies.
|
|---|
| 2 | Copyright (C) 2001, 2002 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This file is part of GNU Wget.
|
|---|
| 5 |
|
|---|
| 6 | GNU Wget is free software; you can redistribute it and/or modify
|
|---|
| 7 | it under the terms of the GNU General Public License as published by
|
|---|
| 8 | the Free Software Foundation; either version 2 of the License, or (at
|
|---|
| 9 | your option) any later version.
|
|---|
| 10 |
|
|---|
| 11 | GNU Wget is distributed in the hope that it will be useful, but
|
|---|
| 12 | WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|---|
| 14 | General Public License for more details.
|
|---|
| 15 |
|
|---|
| 16 | You should have received a copy of the GNU General Public License
|
|---|
| 17 | along with Wget; if not, write to the Free Software
|
|---|
| 18 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|---|
| 19 |
|
|---|
| 20 | In addition, as a special exception, the Free Software Foundation
|
|---|
| 21 | gives permission to link the code of its release of Wget with the
|
|---|
| 22 | OpenSSL project's "OpenSSL" library (or with modified versions of it
|
|---|
| 23 | that use the same license as the "OpenSSL" library), and distribute
|
|---|
| 24 | the linked executables. You must obey the GNU General Public License
|
|---|
| 25 | in all respects for all of the code used other than "OpenSSL". If you
|
|---|
| 26 | modify this file, you may extend this exception to your version of the
|
|---|
| 27 | file, but you are not obligated to do so. If you do not wish to do
|
|---|
| 28 | so, delete this exception statement from your version. */
|
|---|
| 29 |
|
|---|
| 30 | /* Written by Hrvoje Niksic. Parts are loosely inspired by the
|
|---|
| 31 | cookie patch submitted by Tomasz Wegrzanowski.
|
|---|
| 32 |
|
|---|
| 33 | This implements the client-side cookie support, as specified
|
|---|
| 34 | (loosely) by Netscape's "preliminary specification", currently
|
|---|
| 35 | available at:
|
|---|
| 36 |
|
|---|
| 37 | http://wp.netscape.com/newsref/std/cookie_spec.html
|
|---|
| 38 |
|
|---|
| 39 | rfc2109 is not supported because of its incompatibilities with the
|
|---|
| 40 | above widely-used specification. rfc2965 is entirely ignored,
|
|---|
| 41 | since popular client software doesn't implement it, and even the
|
|---|
| 42 | sites that do send Set-Cookie2 also emit Set-Cookie for
|
|---|
| 43 | compatibility. */
|
|---|
| 44 |
|
|---|
| 45 | #include <config.h>
|
|---|
| 46 |
|
|---|
| 47 | #include <stdio.h>
|
|---|
| 48 | #ifdef HAVE_STRING_H
|
|---|
| 49 | # include <string.h>
|
|---|
| 50 | #else
|
|---|
| 51 | # include <strings.h>
|
|---|
| 52 | #endif
|
|---|
| 53 | #include <stdlib.h>
|
|---|
| 54 | #include <assert.h>
|
|---|
| 55 | #include <errno.h>
|
|---|
| 56 |
|
|---|
| 57 | #include "wget.h"
|
|---|
| 58 | #include "utils.h"
|
|---|
| 59 | #include "hash.h"
|
|---|
| 60 | #include "cookies.h"
|
|---|
| 61 |
|
|---|
| 62 | /* This should *really* be in a .h file! */
|
|---|
| 63 | time_t http_atotm PARAMS ((const char *));
|
|---|
| 64 | |
|---|
| 65 |
|
|---|
| 66 | /* Declarations of `struct cookie' and the most basic functions. */
|
|---|
| 67 |
|
|---|
| 68 | /* Cookie jar serves as cookie storage and a means of retrieving
|
|---|
| 69 | cookies efficiently. All cookies with the same domain are stored
|
|---|
| 70 | in a linked list called "chain". A cookie chain can be reached by
|
|---|
| 71 | looking up the domain in the cookie jar's chains_by_domain table.
|
|---|
| 72 |
|
|---|
| 73 | For example, to reach all the cookies under google.com, one must
|
|---|
| 74 | execute hash_table_get(jar->chains_by_domain, "google.com"). Of
|
|---|
| 75 | course, when sending a cookie to `www.google.com', one must search
|
|---|
| 76 | for cookies that belong to either `www.google.com' or `google.com'
|
|---|
| 77 | -- but the point is that the code doesn't need to go through *all*
|
|---|
| 78 | the cookies. */
|
|---|
| 79 |
|
|---|
| 80 | struct cookie_jar {
|
|---|
| 81 | /* Cookie chains indexed by domain. */
|
|---|
| 82 | struct hash_table *chains;
|
|---|
| 83 |
|
|---|
| 84 | int cookie_count; /* number of cookies in the jar. */
|
|---|
| 85 | };
|
|---|
| 86 |
|
|---|
| 87 | /* Value set by entry point functions, so that the low-level
|
|---|
| 88 | routines don't need to call time() all the time. */
|
|---|
| 89 | static time_t cookies_now;
|
|---|
| 90 |
|
|---|
| 91 | struct cookie_jar *
|
|---|
| 92 | cookie_jar_new (void)
|
|---|
| 93 | {
|
|---|
| 94 | struct cookie_jar *jar = xnew (struct cookie_jar);
|
|---|
| 95 | jar->chains = make_nocase_string_hash_table (0);
|
|---|
| 96 | jar->cookie_count = 0;
|
|---|
| 97 | return jar;
|
|---|
| 98 | }
|
|---|
| 99 |
|
|---|
| 100 | struct cookie {
|
|---|
| 101 | char *domain; /* domain of the cookie */
|
|---|
| 102 | int port; /* port number */
|
|---|
| 103 | char *path; /* path prefix of the cookie */
|
|---|
| 104 |
|
|---|
| 105 | int secure; /* whether cookie should be
|
|---|
| 106 | transmitted over non-https
|
|---|
| 107 | connections. */
|
|---|
| 108 | int domain_exact; /* whether DOMAIN must match as a
|
|---|
| 109 | whole. */
|
|---|
| 110 |
|
|---|
| 111 | int permanent; /* whether the cookie should outlive
|
|---|
| 112 | the session. */
|
|---|
| 113 | time_t expiry_time; /* time when the cookie expires, 0
|
|---|
| 114 | means undetermined. */
|
|---|
| 115 |
|
|---|
| 116 | int discard_requested; /* whether cookie was created to
|
|---|
| 117 | request discarding another
|
|---|
| 118 | cookie. */
|
|---|
| 119 |
|
|---|
| 120 | char *attr; /* cookie attribute name */
|
|---|
| 121 | char *value; /* cookie attribute value */
|
|---|
| 122 |
|
|---|
| 123 | struct cookie *next; /* used for chaining of cookies in the
|
|---|
| 124 | same domain. */
|
|---|
| 125 | };
|
|---|
| 126 |
|
|---|
| 127 | #define PORT_ANY (-1)
|
|---|
| 128 |
|
|---|
| 129 | /* Allocate and return a new, empty cookie structure. */
|
|---|
| 130 |
|
|---|
| 131 | static struct cookie *
|
|---|
| 132 | cookie_new (void)
|
|---|
| 133 | {
|
|---|
| 134 | struct cookie *cookie = xnew0 (struct cookie);
|
|---|
| 135 |
|
|---|
| 136 | /* Both cookie->permanent and cookie->expiry_time are now 0. This
|
|---|
| 137 | means that the cookie doesn't expire, but is only valid for this
|
|---|
| 138 | session (i.e. not written out to disk). */
|
|---|
| 139 |
|
|---|
| 140 | cookie->port = PORT_ANY;
|
|---|
| 141 | return cookie;
|
|---|
| 142 | }
|
|---|
| 143 |
|
|---|
| 144 | /* Non-zero if the cookie has expired. Assumes cookies_now has been
|
|---|
| 145 | set by one of the entry point functions. */
|
|---|
| 146 |
|
|---|
| 147 | static int
|
|---|
| 148 | cookie_expired_p (const struct cookie *c)
|
|---|
| 149 | {
|
|---|
| 150 | return c->expiry_time != 0 && c->expiry_time < cookies_now;
|
|---|
| 151 | }
|
|---|
| 152 |
|
|---|
| 153 | /* Deallocate COOKIE and its components. */
|
|---|
| 154 |
|
|---|
| 155 | static void
|
|---|
| 156 | delete_cookie (struct cookie *cookie)
|
|---|
| 157 | {
|
|---|
| 158 | xfree_null (cookie->domain);
|
|---|
| 159 | xfree_null (cookie->path);
|
|---|
| 160 | xfree_null (cookie->attr);
|
|---|
| 161 | xfree_null (cookie->value);
|
|---|
| 162 | xfree (cookie);
|
|---|
| 163 | }
|
|---|
| 164 | |
|---|
| 165 |
|
|---|
| 166 | /* Functions for storing cookies.
|
|---|
| 167 |
|
|---|
| 168 | All cookies can be reached beginning with jar->chains. The key in
|
|---|
| 169 | that table is the domain name, and the value is a linked list of
|
|---|
| 170 | all cookies from that domain. Every new cookie is placed on the
|
|---|
| 171 | head of the list. */
|
|---|
| 172 |
|
|---|
| 173 | /* Find and return a cookie in JAR whose domain, path, and attribute
|
|---|
| 174 | name correspond to COOKIE. If found, PREVPTR will point to the
|
|---|
| 175 | location of the cookie previous in chain, or NULL if the found
|
|---|
| 176 | cookie is the head of a chain.
|
|---|
| 177 |
|
|---|
| 178 | If no matching cookie is found, return NULL. */
|
|---|
| 179 |
|
|---|
| 180 | static struct cookie *
|
|---|
| 181 | find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
|
|---|
| 182 | struct cookie **prevptr)
|
|---|
| 183 | {
|
|---|
| 184 | struct cookie *chain, *prev;
|
|---|
| 185 |
|
|---|
| 186 | chain = hash_table_get (jar->chains, cookie->domain);
|
|---|
| 187 | if (!chain)
|
|---|
| 188 | goto nomatch;
|
|---|
| 189 |
|
|---|
| 190 | prev = NULL;
|
|---|
| 191 | for (; chain; prev = chain, chain = chain->next)
|
|---|
| 192 | if (0 == strcmp (cookie->path, chain->path)
|
|---|
| 193 | && 0 == strcmp (cookie->attr, chain->attr)
|
|---|
| 194 | && cookie->port == chain->port)
|
|---|
| 195 | {
|
|---|
| 196 | *prevptr = prev;
|
|---|
| 197 | return chain;
|
|---|
| 198 | }
|
|---|
| 199 |
|
|---|
| 200 | nomatch:
|
|---|
| 201 | *prevptr = NULL;
|
|---|
| 202 | return NULL;
|
|---|
| 203 | }
|
|---|
| 204 |
|
|---|
| 205 | /* Store COOKIE to the jar.
|
|---|
| 206 |
|
|---|
| 207 | This is done by placing COOKIE at the head of its chain. However,
|
|---|
| 208 | if COOKIE matches a cookie already in memory, as determined by
|
|---|
| 209 | find_matching_cookie, the old cookie is unlinked and destroyed.
|
|---|
| 210 |
|
|---|
| 211 | The key of each chain's hash table entry is allocated only the
|
|---|
| 212 | first time; next hash_table_put's reuse the same key. */
|
|---|
| 213 |
|
|---|
| 214 | static void
|
|---|
| 215 | store_cookie (struct cookie_jar *jar, struct cookie *cookie)
|
|---|
| 216 | {
|
|---|
| 217 | struct cookie *chain_head;
|
|---|
| 218 | char *chain_key;
|
|---|
| 219 |
|
|---|
| 220 | if (hash_table_get_pair (jar->chains, cookie->domain,
|
|---|
| 221 | &chain_key, &chain_head))
|
|---|
| 222 | {
|
|---|
| 223 | /* A chain of cookies in this domain already exists. Check for
|
|---|
| 224 | duplicates -- if an extant cookie exactly matches our domain,
|
|---|
| 225 | port, path, and name, replace it. */
|
|---|
| 226 | struct cookie *prev;
|
|---|
| 227 | struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
|
|---|
| 228 |
|
|---|
| 229 | if (victim)
|
|---|
| 230 | {
|
|---|
| 231 | /* Remove VICTIM from the chain. COOKIE will be placed at
|
|---|
| 232 | the head. */
|
|---|
| 233 | if (prev)
|
|---|
| 234 | {
|
|---|
| 235 | prev->next = victim->next;
|
|---|
| 236 | cookie->next = chain_head;
|
|---|
| 237 | }
|
|---|
| 238 | else
|
|---|
| 239 | {
|
|---|
| 240 | /* prev is NULL; apparently VICTIM was at the head of
|
|---|
| 241 | the chain. This place will be taken by COOKIE, so
|
|---|
| 242 | all we need to do is: */
|
|---|
| 243 | cookie->next = victim->next;
|
|---|
| 244 | }
|
|---|
| 245 | delete_cookie (victim);
|
|---|
| 246 | --jar->cookie_count;
|
|---|
| 247 | DEBUGP (("Deleted old cookie (to be replaced.)\n"));
|
|---|
| 248 | }
|
|---|
| 249 | else
|
|---|
| 250 | cookie->next = chain_head;
|
|---|
| 251 | }
|
|---|
| 252 | else
|
|---|
| 253 | {
|
|---|
| 254 | /* We are now creating the chain. Use a copy of cookie->domain
|
|---|
| 255 | as the key for the life-time of the chain. Using
|
|---|
| 256 | cookie->domain would be unsafe because the life-time of the
|
|---|
| 257 | chain may exceed the life-time of the cookie. (Cookies may
|
|---|
| 258 | be deleted from the chain by this very function.) */
|
|---|
| 259 | cookie->next = NULL;
|
|---|
| 260 | chain_key = xstrdup (cookie->domain);
|
|---|
| 261 | }
|
|---|
| 262 |
|
|---|
| 263 | hash_table_put (jar->chains, chain_key, cookie);
|
|---|
| 264 | ++jar->cookie_count;
|
|---|
| 265 |
|
|---|
| 266 | #ifdef ENABLE_DEBUG
|
|---|
| 267 | if (opt.debug)
|
|---|
| 268 | {
|
|---|
| 269 | time_t exptime = cookie->expiry_time;
|
|---|
| 270 | DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
|
|---|
| 271 | cookie->domain, cookie->port,
|
|---|
| 272 | cookie->port == PORT_ANY ? " (ANY)" : "",
|
|---|
| 273 | cookie->path,
|
|---|
| 274 | cookie->permanent ? "permanent" : "session",
|
|---|
| 275 | cookie->secure ? "secure" : "insecure",
|
|---|
| 276 | cookie->expiry_time ? datetime_str (&exptime) : "none",
|
|---|
| 277 | cookie->attr, cookie->value));
|
|---|
| 278 | }
|
|---|
| 279 | #endif
|
|---|
| 280 | }
|
|---|
| 281 |
|
|---|
| 282 | /* Discard a cookie matching COOKIE's domain, port, path, and
|
|---|
| 283 | attribute name. This gets called when we encounter a cookie whose
|
|---|
| 284 | expiry date is in the past, or whose max-age is set to 0. The
|
|---|
| 285 | former corresponds to netscape cookie spec, while the latter is
|
|---|
| 286 | specified by rfc2109. */
|
|---|
| 287 |
|
|---|
| 288 | static void
|
|---|
| 289 | discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
|
|---|
| 290 | {
|
|---|
| 291 | struct cookie *prev, *victim;
|
|---|
| 292 |
|
|---|
| 293 | if (!hash_table_count (jar->chains))
|
|---|
| 294 | /* No elements == nothing to discard. */
|
|---|
| 295 | return;
|
|---|
| 296 |
|
|---|
| 297 | victim = find_matching_cookie (jar, cookie, &prev);
|
|---|
| 298 | if (victim)
|
|---|
| 299 | {
|
|---|
| 300 | if (prev)
|
|---|
| 301 | /* Simply unchain the victim. */
|
|---|
| 302 | prev->next = victim->next;
|
|---|
| 303 | else
|
|---|
| 304 | {
|
|---|
| 305 | /* VICTIM was head of its chain. We need to place a new
|
|---|
| 306 | cookie at the head. */
|
|---|
| 307 | char *chain_key = NULL;
|
|---|
| 308 | int res;
|
|---|
| 309 |
|
|---|
| 310 | res = hash_table_get_pair (jar->chains, victim->domain,
|
|---|
| 311 | &chain_key, NULL);
|
|---|
| 312 | assert (res != 0);
|
|---|
| 313 | if (!victim->next)
|
|---|
| 314 | {
|
|---|
| 315 | /* VICTIM was the only cookie in the chain. Destroy the
|
|---|
| 316 | chain and deallocate the chain key. */
|
|---|
| 317 | hash_table_remove (jar->chains, victim->domain);
|
|---|
| 318 | xfree (chain_key);
|
|---|
| 319 | }
|
|---|
| 320 | else
|
|---|
| 321 | hash_table_put (jar->chains, chain_key, victim->next);
|
|---|
| 322 | }
|
|---|
| 323 | delete_cookie (victim);
|
|---|
| 324 | DEBUGP (("Discarded old cookie.\n"));
|
|---|
| 325 | }
|
|---|
| 326 | }
|
|---|
| 327 | |
|---|
| 328 |
|
|---|
| 329 | /* Functions for parsing the `Set-Cookie' header, and creating new
|
|---|
| 330 | cookies from the wire. */
|
|---|
| 331 |
|
|---|
| 332 | #define NAME_IS(string_literal) \
|
|---|
| 333 | BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
|
|---|
| 334 |
|
|---|
| 335 | #define VALUE_EXISTS (value_b && value_e)
|
|---|
| 336 |
|
|---|
| 337 | #define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
|
|---|
| 338 |
|
|---|
| 339 | /* Update the appropriate cookie field. [name_b, name_e) are expected
|
|---|
| 340 | to delimit the attribute name, while [value_b, value_e) (optional)
|
|---|
| 341 | should delimit the attribute value.
|
|---|
| 342 |
|
|---|
| 343 | When called the first time, it will set the cookie's attribute name
|
|---|
| 344 | and value. After that, it will check the attribute name for
|
|---|
| 345 | special fields such as `domain', `path', etc. Where appropriate,
|
|---|
| 346 | it will parse the values of the fields it recognizes and fill the
|
|---|
| 347 | corresponding fields in COOKIE.
|
|---|
| 348 |
|
|---|
| 349 | Returns 1 on success. Returns zero in case a syntax error is
|
|---|
| 350 | found; such a cookie should be discarded. */
|
|---|
| 351 |
|
|---|
| 352 | static int
|
|---|
| 353 | update_cookie_field (struct cookie *cookie,
|
|---|
| 354 | const char *name_b, const char *name_e,
|
|---|
| 355 | const char *value_b, const char *value_e)
|
|---|
| 356 | {
|
|---|
| 357 | assert (name_b != NULL && name_e != NULL);
|
|---|
| 358 |
|
|---|
| 359 | if (!cookie->attr)
|
|---|
| 360 | {
|
|---|
| 361 | if (!VALUE_EXISTS)
|
|---|
| 362 | return 0;
|
|---|
| 363 | cookie->attr = strdupdelim (name_b, name_e);
|
|---|
| 364 | cookie->value = strdupdelim (value_b, value_e);
|
|---|
| 365 | return 1;
|
|---|
| 366 | }
|
|---|
| 367 |
|
|---|
| 368 | if (NAME_IS ("domain"))
|
|---|
| 369 | {
|
|---|
| 370 | if (!VALUE_NON_EMPTY)
|
|---|
| 371 | return 0;
|
|---|
| 372 | xfree_null (cookie->domain);
|
|---|
| 373 | /* Strictly speaking, we should set cookie->domain_exact if the
|
|---|
| 374 | domain doesn't begin with a dot. But many sites set the
|
|---|
| 375 | domain to "foo.com" and expect "subhost.foo.com" to get the
|
|---|
| 376 | cookie, and it apparently works. */
|
|---|
| 377 | if (*value_b == '.')
|
|---|
| 378 | ++value_b;
|
|---|
| 379 | cookie->domain = strdupdelim (value_b, value_e);
|
|---|
| 380 | return 1;
|
|---|
| 381 | }
|
|---|
| 382 | else if (NAME_IS ("path"))
|
|---|
| 383 | {
|
|---|
| 384 | if (!VALUE_NON_EMPTY)
|
|---|
| 385 | return 0;
|
|---|
| 386 | xfree_null (cookie->path);
|
|---|
| 387 | cookie->path = strdupdelim (value_b, value_e);
|
|---|
| 388 | return 1;
|
|---|
| 389 | }
|
|---|
| 390 | else if (NAME_IS ("expires"))
|
|---|
| 391 | {
|
|---|
| 392 | char *value_copy;
|
|---|
| 393 | time_t expires;
|
|---|
| 394 |
|
|---|
| 395 | if (!VALUE_NON_EMPTY)
|
|---|
| 396 | return 0;
|
|---|
| 397 | BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
|
|---|
| 398 |
|
|---|
| 399 | expires = http_atotm (value_copy);
|
|---|
| 400 | if (expires != (time_t) -1)
|
|---|
| 401 | {
|
|---|
| 402 | cookie->permanent = 1;
|
|---|
| 403 | cookie->expiry_time = expires;
|
|---|
| 404 | }
|
|---|
| 405 | else
|
|---|
| 406 | /* Error in expiration spec. Assume default (cookie doesn't
|
|---|
| 407 | expire, but valid only for this session.) */
|
|---|
| 408 | ;
|
|---|
| 409 |
|
|---|
| 410 | /* According to netscape's specification, expiry time in the
|
|---|
| 411 | past means that discarding of a matching cookie is
|
|---|
| 412 | requested. */
|
|---|
| 413 | if (cookie->expiry_time < cookies_now)
|
|---|
| 414 | cookie->discard_requested = 1;
|
|---|
| 415 |
|
|---|
| 416 | return 1;
|
|---|
| 417 | }
|
|---|
| 418 | else if (NAME_IS ("max-age"))
|
|---|
| 419 | {
|
|---|
| 420 | double maxage = -1;
|
|---|
| 421 | char *value_copy;
|
|---|
| 422 |
|
|---|
| 423 | if (!VALUE_NON_EMPTY)
|
|---|
| 424 | return 0;
|
|---|
| 425 | BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
|
|---|
| 426 |
|
|---|
| 427 | sscanf (value_copy, "%lf", &maxage);
|
|---|
| 428 | if (maxage == -1)
|
|---|
| 429 | /* something went wrong. */
|
|---|
| 430 | return 0;
|
|---|
| 431 | cookie->permanent = 1;
|
|---|
| 432 | cookie->expiry_time = cookies_now + maxage;
|
|---|
| 433 |
|
|---|
| 434 | /* According to rfc2109, a cookie with max-age of 0 means that
|
|---|
| 435 | discarding of a matching cookie is requested. */
|
|---|
| 436 | if (maxage == 0)
|
|---|
| 437 | cookie->discard_requested = 1;
|
|---|
| 438 |
|
|---|
| 439 | return 1;
|
|---|
| 440 | }
|
|---|
| 441 | else if (NAME_IS ("secure"))
|
|---|
| 442 | {
|
|---|
| 443 | /* ignore value completely */
|
|---|
| 444 | cookie->secure = 1;
|
|---|
| 445 | return 1;
|
|---|
| 446 | }
|
|---|
| 447 | else
|
|---|
| 448 | /* Unrecognized attribute; ignore it. */
|
|---|
| 449 | return 1;
|
|---|
| 450 | }
|
|---|
| 451 |
|
|---|
| 452 | #undef NAME_IS
|
|---|
| 453 |
|
|---|
| 454 | /* Returns non-zero for characters that are legal in the name of an
|
|---|
| 455 | attribute. This used to allow only alphanumerics, '-', and '_',
|
|---|
| 456 | but we need to be more lenient because a number of sites wants to
|
|---|
| 457 | use weirder attribute names. rfc2965 "informally specifies"
|
|---|
| 458 | attribute name (token) as "a sequence of non-special, non-white
|
|---|
| 459 | space characters". So we allow everything except the stuff we know
|
|---|
| 460 | could harm us. */
|
|---|
| 461 |
|
|---|
| 462 | #define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
|
|---|
| 463 | && (c) != '"' && (c) != '=' \
|
|---|
| 464 | && (c) != ';' && (c) != ',')
|
|---|
| 465 |
|
|---|
| 466 | /* Parse the contents of the `Set-Cookie' header. The header looks
|
|---|
| 467 | like this:
|
|---|
| 468 |
|
|---|
| 469 | name1=value1; name2=value2; ...
|
|---|
| 470 |
|
|---|
| 471 | Trailing semicolon is optional; spaces are allowed between all
|
|---|
| 472 | tokens. Additionally, values may be quoted.
|
|---|
| 473 |
|
|---|
| 474 | A new cookie is returned upon success, NULL otherwise. The
|
|---|
| 475 | specified CALLBACK function (normally `update_cookie_field' is used
|
|---|
| 476 | to update the fields of the newly created cookie structure. */
|
|---|
| 477 |
|
|---|
| 478 | static struct cookie *
|
|---|
| 479 | parse_set_cookies (const char *sc,
|
|---|
| 480 | int (*callback) (struct cookie *,
|
|---|
| 481 | const char *, const char *,
|
|---|
| 482 | const char *, const char *),
|
|---|
| 483 | int silent)
|
|---|
| 484 | {
|
|---|
| 485 | struct cookie *cookie = cookie_new ();
|
|---|
| 486 |
|
|---|
| 487 | /* #### Hand-written DFAs are no fun to debug. We'de be better off
|
|---|
| 488 | to rewrite this as an inline parser. */
|
|---|
| 489 |
|
|---|
| 490 | enum { S_START, S_NAME, S_NAME_POST,
|
|---|
| 491 | S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
|
|---|
| 492 | S_ATTR_ACTION, S_DONE, S_ERROR
|
|---|
| 493 | } state = S_START;
|
|---|
| 494 |
|
|---|
| 495 | const char *p = sc;
|
|---|
| 496 | char c;
|
|---|
| 497 |
|
|---|
| 498 | const char *name_b = NULL, *name_e = NULL;
|
|---|
| 499 | const char *value_b = NULL, *value_e = NULL;
|
|---|
| 500 |
|
|---|
| 501 | c = *p;
|
|---|
| 502 |
|
|---|
| 503 | while (state != S_DONE && state != S_ERROR)
|
|---|
| 504 | {
|
|---|
| 505 | switch (state)
|
|---|
| 506 | {
|
|---|
| 507 | case S_START:
|
|---|
| 508 | if (!c)
|
|---|
| 509 | state = S_DONE;
|
|---|
| 510 | else if (ISSPACE (c))
|
|---|
| 511 | /* Strip all whitespace preceding the name. */
|
|---|
| 512 | c = *++p;
|
|---|
| 513 | else if (ATTR_NAME_CHAR (c))
|
|---|
| 514 | {
|
|---|
| 515 | name_b = p;
|
|---|
| 516 | state = S_NAME;
|
|---|
| 517 | }
|
|---|
| 518 | else
|
|---|
| 519 | /* empty attr name not allowed */
|
|---|
| 520 | state = S_ERROR;
|
|---|
| 521 | break;
|
|---|
| 522 | case S_NAME:
|
|---|
| 523 | if (!c || c == ';' || c == '=' || ISSPACE (c))
|
|---|
| 524 | {
|
|---|
| 525 | name_e = p;
|
|---|
| 526 | state = S_NAME_POST;
|
|---|
| 527 | }
|
|---|
| 528 | else if (ATTR_NAME_CHAR (c))
|
|---|
| 529 | c = *++p;
|
|---|
| 530 | else
|
|---|
| 531 | state = S_ERROR;
|
|---|
| 532 | break;
|
|---|
| 533 | case S_NAME_POST:
|
|---|
| 534 | if (!c || c == ';')
|
|---|
| 535 | {
|
|---|
| 536 | value_b = value_e = NULL;
|
|---|
| 537 | if (c == ';')
|
|---|
| 538 | c = *++p;
|
|---|
| 539 | state = S_ATTR_ACTION;
|
|---|
| 540 | }
|
|---|
| 541 | else if (c == '=')
|
|---|
| 542 | {
|
|---|
| 543 | c = *++p;
|
|---|
| 544 | state = S_VALUE_PRE;
|
|---|
| 545 | }
|
|---|
| 546 | else if (ISSPACE (c))
|
|---|
| 547 | /* Ignore space and keep the state. */
|
|---|
| 548 | c = *++p;
|
|---|
| 549 | else
|
|---|
| 550 | state = S_ERROR;
|
|---|
| 551 | break;
|
|---|
| 552 | case S_VALUE_PRE:
|
|---|
| 553 | if (!c || c == ';')
|
|---|
| 554 | {
|
|---|
| 555 | value_b = value_e = p;
|
|---|
| 556 | if (c == ';')
|
|---|
| 557 | c = *++p;
|
|---|
| 558 | state = S_ATTR_ACTION;
|
|---|
| 559 | }
|
|---|
| 560 | else if (c == '"')
|
|---|
| 561 | {
|
|---|
| 562 | c = *++p;
|
|---|
| 563 | value_b = p;
|
|---|
| 564 | state = S_QUOTED_VALUE;
|
|---|
| 565 | }
|
|---|
| 566 | else if (ISSPACE (c))
|
|---|
| 567 | c = *++p;
|
|---|
| 568 | else
|
|---|
| 569 | {
|
|---|
| 570 | value_b = p;
|
|---|
| 571 | value_e = NULL;
|
|---|
| 572 | state = S_VALUE;
|
|---|
| 573 | }
|
|---|
| 574 | break;
|
|---|
| 575 | case S_VALUE:
|
|---|
| 576 | if (!c || c == ';' || ISSPACE (c))
|
|---|
| 577 | {
|
|---|
| 578 | value_e = p;
|
|---|
| 579 | state = S_VALUE_TRAILSPACE;
|
|---|
| 580 | }
|
|---|
| 581 | else
|
|---|
| 582 | {
|
|---|
| 583 | value_e = NULL; /* no trailing space */
|
|---|
| 584 | c = *++p;
|
|---|
| 585 | }
|
|---|
| 586 | break;
|
|---|
| 587 | case S_QUOTED_VALUE:
|
|---|
| 588 | if (c == '"')
|
|---|
| 589 | {
|
|---|
| 590 | value_e = p;
|
|---|
| 591 | c = *++p;
|
|---|
| 592 | state = S_VALUE_TRAILSPACE;
|
|---|
| 593 | }
|
|---|
| 594 | else if (!c)
|
|---|
| 595 | state = S_ERROR;
|
|---|
| 596 | else
|
|---|
| 597 | c = *++p;
|
|---|
| 598 | break;
|
|---|
| 599 | case S_VALUE_TRAILSPACE:
|
|---|
| 600 | if (c == ';')
|
|---|
| 601 | {
|
|---|
| 602 | c = *++p;
|
|---|
| 603 | state = S_ATTR_ACTION;
|
|---|
| 604 | }
|
|---|
| 605 | else if (!c)
|
|---|
| 606 | state = S_ATTR_ACTION;
|
|---|
| 607 | else if (ISSPACE (c))
|
|---|
| 608 | c = *++p;
|
|---|
| 609 | else
|
|---|
| 610 | state = S_VALUE;
|
|---|
| 611 | break;
|
|---|
| 612 | case S_ATTR_ACTION:
|
|---|
| 613 | {
|
|---|
| 614 | int legal = callback (cookie, name_b, name_e, value_b, value_e);
|
|---|
| 615 | if (!legal)
|
|---|
| 616 | {
|
|---|
| 617 | if (!silent)
|
|---|
| 618 | {
|
|---|
| 619 | char *name;
|
|---|
| 620 | BOUNDED_TO_ALLOCA (name_b, name_e, name);
|
|---|
| 621 | logprintf (LOG_NOTQUIET,
|
|---|
| 622 | _("Error in Set-Cookie, field `%s'"),
|
|---|
| 623 | escnonprint (name));
|
|---|
| 624 | }
|
|---|
| 625 | state = S_ERROR;
|
|---|
| 626 | break;
|
|---|
| 627 | }
|
|---|
| 628 | state = S_START;
|
|---|
| 629 | }
|
|---|
| 630 | break;
|
|---|
| 631 | case S_DONE:
|
|---|
| 632 | case S_ERROR:
|
|---|
| 633 | /* handled by loop condition */
|
|---|
| 634 | break;
|
|---|
| 635 | }
|
|---|
| 636 | }
|
|---|
| 637 | if (state == S_DONE)
|
|---|
| 638 | return cookie;
|
|---|
| 639 |
|
|---|
| 640 | delete_cookie (cookie);
|
|---|
| 641 | if (state != S_ERROR)
|
|---|
| 642 | abort ();
|
|---|
| 643 |
|
|---|
| 644 | if (!silent)
|
|---|
| 645 | logprintf (LOG_NOTQUIET,
|
|---|
| 646 | _("Syntax error in Set-Cookie: %s at position %d.\n"),
|
|---|
| 647 | escnonprint (sc), (int) (p - sc));
|
|---|
| 648 | return NULL;
|
|---|
| 649 | }
|
|---|
| 650 | |
|---|
| 651 |
|
|---|
| 652 | /* Sanity checks. These are important, otherwise it is possible for
|
|---|
| 653 | mailcious attackers to destroy important cookie information and/or
|
|---|
| 654 | violate your privacy. */
|
|---|
| 655 |
|
|---|
| 656 |
|
|---|
| 657 | #define REQUIRE_DIGITS(p) do { \
|
|---|
| 658 | if (!ISDIGIT (*p)) \
|
|---|
| 659 | return 0; \
|
|---|
| 660 | for (++p; ISDIGIT (*p); p++) \
|
|---|
| 661 | ; \
|
|---|
| 662 | } while (0)
|
|---|
| 663 |
|
|---|
| 664 | #define REQUIRE_DOT(p) do { \
|
|---|
| 665 | if (*p++ != '.') \
|
|---|
| 666 | return 0; \
|
|---|
| 667 | } while (0)
|
|---|
| 668 |
|
|---|
| 669 | /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
|
|---|
| 670 |
|
|---|
| 671 | We don't want to call network functions like inet_addr() because
|
|---|
| 672 | all we need is a check, preferrably one that is small, fast, and
|
|---|
| 673 | well-defined. */
|
|---|
| 674 |
|
|---|
| 675 | static int
|
|---|
| 676 | numeric_address_p (const char *addr)
|
|---|
| 677 | {
|
|---|
| 678 | const char *p = addr;
|
|---|
| 679 |
|
|---|
| 680 | REQUIRE_DIGITS (p); /* A */
|
|---|
| 681 | REQUIRE_DOT (p); /* . */
|
|---|
| 682 | REQUIRE_DIGITS (p); /* B */
|
|---|
| 683 | REQUIRE_DOT (p); /* . */
|
|---|
| 684 | REQUIRE_DIGITS (p); /* C */
|
|---|
| 685 | REQUIRE_DOT (p); /* . */
|
|---|
| 686 | REQUIRE_DIGITS (p); /* D */
|
|---|
| 687 |
|
|---|
| 688 | if (*p != '\0')
|
|---|
| 689 | return 0;
|
|---|
| 690 | return 1;
|
|---|
| 691 | }
|
|---|
| 692 |
|
|---|
| 693 | /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
|
|---|
| 694 | Originally I tried to make the check compliant with rfc2109, but
|
|---|
| 695 | the sites deviated too often, so I had to fall back to "tail
|
|---|
| 696 | matching", as defined by the original Netscape's cookie spec. */
|
|---|
| 697 |
|
|---|
| 698 | static int
|
|---|
| 699 | check_domain_match (const char *cookie_domain, const char *host)
|
|---|
| 700 | {
|
|---|
| 701 | DEBUGP (("cdm: 1"));
|
|---|
| 702 |
|
|---|
| 703 | /* Numeric address requires exact match. It also requires HOST to
|
|---|
| 704 | be an IP address. */
|
|---|
| 705 | if (numeric_address_p (cookie_domain))
|
|---|
| 706 | return 0 == strcmp (cookie_domain, host);
|
|---|
| 707 |
|
|---|
| 708 | DEBUGP ((" 2"));
|
|---|
| 709 |
|
|---|
| 710 | /* For the sake of efficiency, check for exact match first. */
|
|---|
| 711 | if (0 == strcasecmp (cookie_domain, host))
|
|---|
| 712 | return 1;
|
|---|
| 713 |
|
|---|
| 714 | DEBUGP ((" 3"));
|
|---|
| 715 |
|
|---|
| 716 | /* HOST must match the tail of cookie_domain. */
|
|---|
| 717 | if (!match_tail (host, cookie_domain, 1))
|
|---|
| 718 | return 0;
|
|---|
| 719 |
|
|---|
| 720 | /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
|
|---|
| 721 | make sure that somebody is not trying to set the cookie for a
|
|---|
| 722 | subdomain shared by many entities. For example, "company.co.uk"
|
|---|
| 723 | must not be allowed to set a cookie for ".co.uk". On the other
|
|---|
| 724 | hand, "sso.redhat.de" should be able to set a cookie for
|
|---|
| 725 | ".redhat.de".
|
|---|
| 726 |
|
|---|
| 727 | The only marginally sane way to handle this I can think of is to
|
|---|
| 728 | reject on the basis of the length of the second-level domain name
|
|---|
| 729 | (but when the top-level domain is unknown), with the assumption
|
|---|
| 730 | that those of three or less characters could be reserved. For
|
|---|
| 731 | example:
|
|---|
| 732 |
|
|---|
| 733 | .co.org -> works because the TLD is known
|
|---|
| 734 | .co.uk -> doesn't work because "co" is only two chars long
|
|---|
| 735 | .com.au -> doesn't work because "com" is only 3 chars long
|
|---|
| 736 | .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
|
|---|
| 737 | .cnn.de -> doesn't work for the same reason (ugh!!)
|
|---|
| 738 | .abcd.de -> works because "abcd" is 4 chars long
|
|---|
| 739 | .img.cnn.de -> works because it's not trying to set the 2nd level domain
|
|---|
| 740 | .cnn.co.uk -> works for the same reason
|
|---|
| 741 |
|
|---|
| 742 | That should prevent misuse, while allowing reasonable usage. If
|
|---|
| 743 | someone knows of a better way to handle this, please let me
|
|---|
| 744 | know. */
|
|---|
| 745 | {
|
|---|
| 746 | const char *p = cookie_domain;
|
|---|
| 747 | int dccount = 1; /* number of domain components */
|
|---|
| 748 | int ldcl = 0; /* last domain component length */
|
|---|
| 749 | int nldcl = 0; /* next to last domain component length */
|
|---|
| 750 | int out;
|
|---|
| 751 | if (*p == '.')
|
|---|
| 752 | /* Ignore leading period in this calculation. */
|
|---|
| 753 | ++p;
|
|---|
| 754 | DEBUGP ((" 4"));
|
|---|
| 755 | for (out = 0; !out; p++)
|
|---|
| 756 | switch (*p)
|
|---|
| 757 | {
|
|---|
| 758 | case '\0':
|
|---|
| 759 | out = 1;
|
|---|
| 760 | break;
|
|---|
| 761 | case '.':
|
|---|
| 762 | if (ldcl == 0)
|
|---|
| 763 | /* Empty domain component found -- the domain is invalid. */
|
|---|
| 764 | return 0;
|
|---|
| 765 | if (*(p + 1) == '\0')
|
|---|
| 766 | {
|
|---|
| 767 | /* Tolerate trailing '.' by not treating the domain as
|
|---|
| 768 | one ending with an empty domain component. */
|
|---|
| 769 | out = 1;
|
|---|
| 770 | break;
|
|---|
| 771 | }
|
|---|
| 772 | nldcl = ldcl;
|
|---|
| 773 | ldcl = 0;
|
|---|
| 774 | ++dccount;
|
|---|
| 775 | break;
|
|---|
| 776 | default:
|
|---|
| 777 | ++ldcl;
|
|---|
| 778 | }
|
|---|
| 779 |
|
|---|
| 780 | DEBUGP ((" 5"));
|
|---|
| 781 |
|
|---|
| 782 | if (dccount < 2)
|
|---|
| 783 | return 0;
|
|---|
| 784 |
|
|---|
| 785 | DEBUGP ((" 6"));
|
|---|
| 786 |
|
|---|
| 787 | if (dccount == 2)
|
|---|
| 788 | {
|
|---|
| 789 | int i;
|
|---|
| 790 | int known_toplevel = 0;
|
|---|
| 791 | static const char *known_toplevel_domains[] = {
|
|---|
| 792 | ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
|
|---|
| 793 | };
|
|---|
| 794 | for (i = 0; i < countof (known_toplevel_domains); i++)
|
|---|
| 795 | if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
|
|---|
| 796 | {
|
|---|
| 797 | known_toplevel = 1;
|
|---|
| 798 | break;
|
|---|
| 799 | }
|
|---|
| 800 | if (!known_toplevel && nldcl <= 3)
|
|---|
| 801 | return 0;
|
|---|
| 802 | }
|
|---|
| 803 | }
|
|---|
| 804 |
|
|---|
| 805 | DEBUGP ((" 7"));
|
|---|
| 806 |
|
|---|
| 807 | /* Don't allow the host "foobar.com" to set a cookie for domain
|
|---|
| 808 | "bar.com". */
|
|---|
| 809 | if (*cookie_domain != '.')
|
|---|
| 810 | {
|
|---|
| 811 | int dlen = strlen (cookie_domain);
|
|---|
| 812 | int hlen = strlen (host);
|
|---|
| 813 | /* cookie host: hostname.foobar.com */
|
|---|
| 814 | /* desired domain: bar.com */
|
|---|
| 815 | /* '.' must be here in host-> ^ */
|
|---|
| 816 | if (hlen > dlen && host[hlen - dlen - 1] != '.')
|
|---|
| 817 | return 0;
|
|---|
| 818 | }
|
|---|
| 819 |
|
|---|
| 820 | DEBUGP ((" 8"));
|
|---|
| 821 |
|
|---|
| 822 | return 1;
|
|---|
| 823 | }
|
|---|
| 824 |
|
|---|
| 825 | static int path_matches PARAMS ((const char *, const char *));
|
|---|
| 826 |
|
|---|
| 827 | /* Check whether PATH begins with COOKIE_PATH. */
|
|---|
| 828 |
|
|---|
| 829 | static int
|
|---|
| 830 | check_path_match (const char *cookie_path, const char *path)
|
|---|
| 831 | {
|
|---|
| 832 | return path_matches (path, cookie_path);
|
|---|
| 833 | }
|
|---|
| 834 |
|
|---|
| 835 | /* Prepend '/' to string S. S is copied to fresh stack-allocated
|
|---|
| 836 | space and its value is modified to point to the new location. */
|
|---|
| 837 |
|
|---|
| 838 | #define PREPEND_SLASH(s) do { \
|
|---|
| 839 | char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
|
|---|
| 840 | *PS_newstr = '/'; \
|
|---|
| 841 | strcpy (PS_newstr + 1, s); \
|
|---|
| 842 | s = PS_newstr; \
|
|---|
| 843 | } while (0)
|
|---|
| 844 |
|
|---|
| 845 | |
|---|
| 846 |
|
|---|
| 847 | /* Process the HTTP `Set-Cookie' header. This results in storing the
|
|---|
| 848 | cookie or discarding a matching one, or ignoring it completely, all
|
|---|
| 849 | depending on the contents. */
|
|---|
| 850 |
|
|---|
| 851 | void
|
|---|
| 852 | cookie_handle_set_cookie (struct cookie_jar *jar,
|
|---|
| 853 | const char *host, int port,
|
|---|
| 854 | const char *path, const char *set_cookie)
|
|---|
| 855 | {
|
|---|
| 856 | struct cookie *cookie;
|
|---|
| 857 | cookies_now = time (NULL);
|
|---|
| 858 |
|
|---|
| 859 | /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
|
|---|
| 860 | usage assumes /-prefixed paths. Until the rest of Wget is fixed,
|
|---|
| 861 | simply prepend slash to PATH. */
|
|---|
| 862 | PREPEND_SLASH (path);
|
|---|
| 863 |
|
|---|
| 864 | cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
|
|---|
| 865 | if (!cookie)
|
|---|
| 866 | goto out;
|
|---|
| 867 |
|
|---|
| 868 | /* Sanitize parts of cookie. */
|
|---|
| 869 |
|
|---|
| 870 | if (!cookie->domain)
|
|---|
| 871 | {
|
|---|
| 872 | copy_domain:
|
|---|
| 873 | /* If the domain was not provided, we use the one we're talking
|
|---|
| 874 | to, and set exact match. */
|
|---|
| 875 | cookie->domain = xstrdup (host);
|
|---|
| 876 | cookie->domain_exact = 1;
|
|---|
| 877 | /* Set the port, but only if it's non-default. */
|
|---|
| 878 | if (port != 80 && port != 443)
|
|---|
| 879 | cookie->port = port;
|
|---|
| 880 | }
|
|---|
| 881 | else
|
|---|
| 882 | {
|
|---|
| 883 | if (!check_domain_match (cookie->domain, host))
|
|---|
| 884 | {
|
|---|
| 885 | logprintf (LOG_NOTQUIET,
|
|---|
| 886 | _("Cookie coming from %s attempted to set domain to %s\n"),
|
|---|
| 887 | escnonprint (host), escnonprint (cookie->domain));
|
|---|
| 888 | xfree (cookie->domain);
|
|---|
| 889 | goto copy_domain;
|
|---|
| 890 | }
|
|---|
| 891 | }
|
|---|
| 892 |
|
|---|
| 893 | if (!cookie->path)
|
|---|
| 894 | {
|
|---|
| 895 | /* The cookie doesn't set path: set it to the URL path, sans the
|
|---|
| 896 | file part ("/dir/file" truncated to "/dir/"). */
|
|---|
| 897 | char *trailing_slash = strrchr (path, '/');
|
|---|
| 898 | if (trailing_slash)
|
|---|
| 899 | cookie->path = strdupdelim (path, trailing_slash + 1);
|
|---|
| 900 | else
|
|---|
| 901 | /* no slash in the string -- can this even happen? */
|
|---|
| 902 | cookie->path = xstrdup (path);
|
|---|
| 903 | }
|
|---|
| 904 | else
|
|---|
| 905 | {
|
|---|
| 906 | /* The cookie sets its own path; verify that it is legal. */
|
|---|
| 907 | if (!check_path_match (cookie->path, path))
|
|---|
| 908 | {
|
|---|
| 909 | DEBUGP (("Attempt to fake the path: %s, %s\n",
|
|---|
| 910 | cookie->path, path));
|
|---|
| 911 | goto out;
|
|---|
| 912 | }
|
|---|
| 913 | }
|
|---|
| 914 |
|
|---|
| 915 | /* Now store the cookie, or discard an existing cookie, if
|
|---|
| 916 | discarding was requested. */
|
|---|
| 917 |
|
|---|
| 918 | if (cookie->discard_requested)
|
|---|
| 919 | {
|
|---|
| 920 | discard_matching_cookie (jar, cookie);
|
|---|
| 921 | goto out;
|
|---|
| 922 | }
|
|---|
| 923 |
|
|---|
| 924 | store_cookie (jar, cookie);
|
|---|
| 925 | return;
|
|---|
| 926 |
|
|---|
| 927 | out:
|
|---|
| 928 | if (cookie)
|
|---|
| 929 | delete_cookie (cookie);
|
|---|
| 930 | }
|
|---|
| 931 | |
|---|
| 932 |
|
|---|
| 933 | /* Support for sending out cookies in HTTP requests, based on
|
|---|
| 934 | previously stored cookies. Entry point is
|
|---|
| 935 | `build_cookies_request'. */
|
|---|
| 936 |
|
|---|
| 937 | /* Return a count of how many times CHR occurs in STRING. */
|
|---|
| 938 |
|
|---|
| 939 | static int
|
|---|
| 940 | count_char (const char *string, char chr)
|
|---|
| 941 | {
|
|---|
| 942 | const char *p;
|
|---|
| 943 | int count = 0;
|
|---|
| 944 | for (p = string; *p; p++)
|
|---|
| 945 | if (*p == chr)
|
|---|
| 946 | ++count;
|
|---|
| 947 | return count;
|
|---|
| 948 | }
|
|---|
| 949 |
|
|---|
| 950 | /* Find the cookie chains whose domains match HOST and store them to
|
|---|
| 951 | DEST.
|
|---|
| 952 |
|
|---|
| 953 | A cookie chain is the head of a list of cookies that belong to a
|
|---|
| 954 | host/domain. Given HOST "img.search.xemacs.org", this function
|
|---|
| 955 | will return the chains for "img.search.xemacs.org",
|
|---|
| 956 | "search.xemacs.org", and "xemacs.org" -- those of them that exist
|
|---|
| 957 | (if any), that is.
|
|---|
| 958 |
|
|---|
| 959 | DEST should be large enough to accept (in the worst case) as many
|
|---|
| 960 | elements as there are domain components of HOST. */
|
|---|
| 961 |
|
|---|
| 962 | static int
|
|---|
| 963 | find_chains_of_host (struct cookie_jar *jar, const char *host,
|
|---|
| 964 | struct cookie *dest[])
|
|---|
| 965 | {
|
|---|
| 966 | int dest_count = 0;
|
|---|
| 967 | int passes, passcnt;
|
|---|
| 968 |
|
|---|
| 969 | /* Bail out quickly if there are no cookies in the jar. */
|
|---|
| 970 | if (!hash_table_count (jar->chains))
|
|---|
| 971 | return 0;
|
|---|
| 972 |
|
|---|
| 973 | if (numeric_address_p (host))
|
|---|
| 974 | /* If host is an IP address, only check for the exact match. */
|
|---|
| 975 | passes = 1;
|
|---|
| 976 | else
|
|---|
| 977 | /* Otherwise, check all the subdomains except the top-level (last)
|
|---|
| 978 | one. As a domain with N components has N-1 dots, the number of
|
|---|
| 979 | passes equals the number of dots. */
|
|---|
| 980 | passes = count_char (host, '.');
|
|---|
| 981 |
|
|---|
| 982 | passcnt = 0;
|
|---|
| 983 |
|
|---|
| 984 | /* Find chains that match HOST, starting with exact match and
|
|---|
| 985 | progressing to less specific domains. For instance, given HOST
|
|---|
| 986 | fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
|
|---|
| 987 | srk.fer.hr's, then fer.hr's. */
|
|---|
| 988 | while (1)
|
|---|
| 989 | {
|
|---|
| 990 | struct cookie *chain = hash_table_get (jar->chains, host);
|
|---|
| 991 | if (chain)
|
|---|
| 992 | dest[dest_count++] = chain;
|
|---|
| 993 | if (++passcnt >= passes)
|
|---|
| 994 | break;
|
|---|
| 995 | host = strchr (host, '.') + 1;
|
|---|
| 996 | }
|
|---|
| 997 |
|
|---|
| 998 | return dest_count;
|
|---|
| 999 | }
|
|---|
| 1000 |
|
|---|
| 1001 | /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
|
|---|
| 1002 | otherwise. */
|
|---|
| 1003 |
|
|---|
| 1004 | static int
|
|---|
| 1005 | path_matches (const char *full_path, const char *prefix)
|
|---|
| 1006 | {
|
|---|
| 1007 | int len = strlen (prefix);
|
|---|
| 1008 |
|
|---|
| 1009 | if (0 != strncmp (full_path, prefix, len))
|
|---|
| 1010 | /* FULL_PATH doesn't begin with PREFIX. */
|
|---|
| 1011 | return 0;
|
|---|
| 1012 |
|
|---|
| 1013 | /* Length of PREFIX determines the quality of the match. */
|
|---|
| 1014 | return len + 1;
|
|---|
| 1015 | }
|
|---|
| 1016 |
|
|---|
| 1017 | /* Return non-zero iff COOKIE matches the provided parameters of the
|
|---|
| 1018 | URL being downloaded: HOST, PORT, PATH, and SECFLAG.
|
|---|
| 1019 |
|
|---|
| 1020 | If PATH_GOODNESS is non-NULL, store the "path goodness" value
|
|---|
| 1021 | there. That value is a measure of how closely COOKIE matches PATH,
|
|---|
| 1022 | used for ordering cookies. */
|
|---|
| 1023 |
|
|---|
| 1024 | static int
|
|---|
| 1025 | cookie_matches_url (const struct cookie *cookie,
|
|---|
| 1026 | const char *host, int port, const char *path,
|
|---|
| 1027 | int secflag, int *path_goodness)
|
|---|
| 1028 | {
|
|---|
| 1029 | int pg;
|
|---|
| 1030 |
|
|---|
| 1031 | if (cookie_expired_p (cookie))
|
|---|
| 1032 | /* Ignore stale cookies. Don't bother unchaining the cookie at
|
|---|
| 1033 | this point -- Wget is a relatively short-lived application, and
|
|---|
| 1034 | stale cookies will not be saved by `save_cookies'. On the
|
|---|
| 1035 | other hand, this function should be as efficient as
|
|---|
| 1036 | possible. */
|
|---|
| 1037 | return 0;
|
|---|
| 1038 |
|
|---|
| 1039 | if (cookie->secure && !secflag)
|
|---|
| 1040 | /* Don't transmit secure cookies over insecure connections. */
|
|---|
| 1041 | return 0;
|
|---|
| 1042 | if (cookie->port != PORT_ANY && cookie->port != port)
|
|---|
| 1043 | return 0;
|
|---|
| 1044 |
|
|---|
| 1045 | /* If exact domain match is required, verify that cookie's domain is
|
|---|
| 1046 | equal to HOST. If not, assume success on the grounds of the
|
|---|
| 1047 | cookie's chain having been found by find_chains_of_host. */
|
|---|
| 1048 | if (cookie->domain_exact
|
|---|
| 1049 | && 0 != strcasecmp (host, cookie->domain))
|
|---|
| 1050 | return 0;
|
|---|
| 1051 |
|
|---|
| 1052 | pg = path_matches (path, cookie->path);
|
|---|
| 1053 | if (!pg)
|
|---|
| 1054 | return 0;
|
|---|
| 1055 |
|
|---|
| 1056 | if (path_goodness)
|
|---|
| 1057 | /* If the caller requested path_goodness, we return it. This is
|
|---|
| 1058 | an optimization, so that the caller doesn't need to call
|
|---|
| 1059 | path_matches() again. */
|
|---|
| 1060 | *path_goodness = pg;
|
|---|
| 1061 | return 1;
|
|---|
| 1062 | }
|
|---|
| 1063 |
|
|---|
| 1064 | /* A structure that points to a cookie, along with the additional
|
|---|
| 1065 | information about the cookie's "goodness". This allows us to sort
|
|---|
| 1066 | the cookies when returning them to the server, as required by the
|
|---|
| 1067 | spec. */
|
|---|
| 1068 |
|
|---|
| 1069 | struct weighed_cookie {
|
|---|
| 1070 | struct cookie *cookie;
|
|---|
| 1071 | int domain_goodness;
|
|---|
| 1072 | int path_goodness;
|
|---|
| 1073 | };
|
|---|
| 1074 |
|
|---|
| 1075 | /* Comparator used for uniquifying the list. */
|
|---|
| 1076 |
|
|---|
| 1077 | static int
|
|---|
| 1078 | equality_comparator (const void *p1, const void *p2)
|
|---|
| 1079 | {
|
|---|
| 1080 | struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
|
|---|
| 1081 | struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
|
|---|
| 1082 |
|
|---|
| 1083 | int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
|
|---|
| 1084 | int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
|
|---|
| 1085 |
|
|---|
| 1086 | /* We only really care whether both name and value are equal. We
|
|---|
| 1087 | return them in this order only for consistency... */
|
|---|
| 1088 | return namecmp ? namecmp : valuecmp;
|
|---|
| 1089 | }
|
|---|
| 1090 |
|
|---|
| 1091 | /* Eliminate duplicate cookies. "Duplicate cookies" are any two
|
|---|
| 1092 | cookies with the same attr name and value. Whenever a duplicate
|
|---|
| 1093 | pair is found, one of the cookies is removed. */
|
|---|
| 1094 |
|
|---|
| 1095 | static int
|
|---|
| 1096 | eliminate_dups (struct weighed_cookie *outgoing, int count)
|
|---|
| 1097 | {
|
|---|
| 1098 | struct weighed_cookie *h; /* hare */
|
|---|
| 1099 | struct weighed_cookie *t; /* tortoise */
|
|---|
| 1100 | struct weighed_cookie *end = outgoing + count;
|
|---|
| 1101 |
|
|---|
| 1102 | /* We deploy a simple uniquify algorithm: first sort the array
|
|---|
| 1103 | according to our sort criteria, then copy it to itself, comparing
|
|---|
| 1104 | each cookie to its neighbor and ignoring the duplicates. */
|
|---|
| 1105 |
|
|---|
| 1106 | qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
|
|---|
| 1107 |
|
|---|
| 1108 | /* "Hare" runs through all the entries in the array, followed by
|
|---|
| 1109 | "tortoise". If a duplicate is found, the hare skips it.
|
|---|
| 1110 | Non-duplicate entries are copied to the tortoise ptr. */
|
|---|
| 1111 |
|
|---|
| 1112 | for (h = t = outgoing; h < end; h++)
|
|---|
| 1113 | {
|
|---|
| 1114 | if (h != end - 1)
|
|---|
| 1115 | {
|
|---|
| 1116 | struct cookie *c0 = h[0].cookie;
|
|---|
| 1117 | struct cookie *c1 = h[1].cookie;
|
|---|
| 1118 | if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
|
|---|
| 1119 | continue; /* ignore the duplicate */
|
|---|
| 1120 | }
|
|---|
| 1121 |
|
|---|
| 1122 | /* If the hare has advanced past the tortoise (because of
|
|---|
| 1123 | previous dups), make sure the values get copied. Otherwise,
|
|---|
| 1124 | no copying is necessary. */
|
|---|
| 1125 | if (h != t)
|
|---|
| 1126 | *t++ = *h;
|
|---|
| 1127 | else
|
|---|
| 1128 | t++;
|
|---|
| 1129 | }
|
|---|
| 1130 | return t - outgoing;
|
|---|
| 1131 | }
|
|---|
| 1132 |
|
|---|
| 1133 | /* Comparator used for sorting by quality. */
|
|---|
| 1134 |
|
|---|
| 1135 | static int
|
|---|
| 1136 | goodness_comparator (const void *p1, const void *p2)
|
|---|
| 1137 | {
|
|---|
| 1138 | struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
|
|---|
| 1139 | struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
|
|---|
| 1140 |
|
|---|
| 1141 | /* Subtractions take `wc2' as the first argument becauase we want a
|
|---|
| 1142 | sort in *decreasing* order of goodness. */
|
|---|
| 1143 | int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
|
|---|
| 1144 | int pgdiff = wc2->path_goodness - wc1->path_goodness;
|
|---|
| 1145 |
|
|---|
| 1146 | /* Sort by domain goodness; if these are the same, sort by path
|
|---|
| 1147 | goodness. (The sorting order isn't really specified; maybe it
|
|---|
| 1148 | should be the other way around.) */
|
|---|
| 1149 | return dgdiff ? dgdiff : pgdiff;
|
|---|
| 1150 | }
|
|---|
| 1151 |
|
|---|
| 1152 | /* Generate a `Cookie' header for a request that goes to HOST:PORT and
|
|---|
| 1153 | requests PATH from the server. The resulting string is allocated
|
|---|
| 1154 | with `malloc', and the caller is responsible for freeing it. If no
|
|---|
| 1155 | cookies pertain to this request, i.e. no cookie header should be
|
|---|
| 1156 | generated, NULL is returned. */
|
|---|
| 1157 |
|
|---|
| 1158 | char *
|
|---|
| 1159 | cookie_header (struct cookie_jar *jar, const char *host,
|
|---|
| 1160 | int port, const char *path, int secflag)
|
|---|
| 1161 | {
|
|---|
| 1162 | struct cookie **chains;
|
|---|
| 1163 | int chain_count;
|
|---|
| 1164 |
|
|---|
| 1165 | struct cookie *cookie;
|
|---|
| 1166 | struct weighed_cookie *outgoing;
|
|---|
| 1167 | int count, i, ocnt;
|
|---|
| 1168 | char *result;
|
|---|
| 1169 | int result_size, pos;
|
|---|
| 1170 | PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
|
|---|
| 1171 |
|
|---|
| 1172 | /* First, find the cookie chains whose domains match HOST. */
|
|---|
| 1173 |
|
|---|
| 1174 | /* Allocate room for find_chains_of_host to write to. The number of
|
|---|
| 1175 | chains can at most equal the number of subdomains, hence
|
|---|
| 1176 | 1+<number of dots>. */
|
|---|
| 1177 | chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
|
|---|
| 1178 | chain_count = find_chains_of_host (jar, host, chains);
|
|---|
| 1179 |
|
|---|
| 1180 | /* No cookies for this host. */
|
|---|
| 1181 | if (!chain_count)
|
|---|
| 1182 | return NULL;
|
|---|
| 1183 |
|
|---|
| 1184 | cookies_now = time (NULL);
|
|---|
| 1185 |
|
|---|
| 1186 | /* Now extract from the chains those cookies that match our host
|
|---|
| 1187 | (for domain_exact cookies), port (for cookies with port other
|
|---|
| 1188 | than PORT_ANY), etc. See matching_cookie for details. */
|
|---|
| 1189 |
|
|---|
| 1190 | /* Count the number of matching cookies. */
|
|---|
| 1191 | count = 0;
|
|---|
| 1192 | for (i = 0; i < chain_count; i++)
|
|---|
| 1193 | for (cookie = chains[i]; cookie; cookie = cookie->next)
|
|---|
| 1194 | if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
|
|---|
| 1195 | ++count;
|
|---|
| 1196 | if (!count)
|
|---|
| 1197 | return NULL; /* no cookies matched */
|
|---|
| 1198 |
|
|---|
| 1199 | /* Allocate the array. */
|
|---|
| 1200 | outgoing = alloca_array (struct weighed_cookie, count);
|
|---|
| 1201 |
|
|---|
| 1202 | /* Fill the array with all the matching cookies from the chains that
|
|---|
| 1203 | match HOST. */
|
|---|
| 1204 | ocnt = 0;
|
|---|
| 1205 | for (i = 0; i < chain_count; i++)
|
|---|
| 1206 | for (cookie = chains[i]; cookie; cookie = cookie->next)
|
|---|
| 1207 | {
|
|---|
| 1208 | int pg;
|
|---|
| 1209 | if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
|
|---|
| 1210 | continue;
|
|---|
| 1211 | outgoing[ocnt].cookie = cookie;
|
|---|
| 1212 | outgoing[ocnt].domain_goodness = strlen (cookie->domain);
|
|---|
| 1213 | outgoing[ocnt].path_goodness = pg;
|
|---|
| 1214 | ++ocnt;
|
|---|
| 1215 | }
|
|---|
| 1216 | assert (ocnt == count);
|
|---|
| 1217 |
|
|---|
| 1218 | /* Eliminate duplicate cookies; that is, those whose name and value
|
|---|
| 1219 | are the same. */
|
|---|
| 1220 | count = eliminate_dups (outgoing, count);
|
|---|
| 1221 |
|
|---|
| 1222 | /* Sort the array so that best-matching domains come first, and
|
|---|
| 1223 | that, within one domain, best-matching paths come first. */
|
|---|
| 1224 | qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
|
|---|
| 1225 |
|
|---|
| 1226 | /* Count the space the name=value pairs will take. */
|
|---|
| 1227 | result_size = 0;
|
|---|
| 1228 | for (i = 0; i < count; i++)
|
|---|
| 1229 | {
|
|---|
| 1230 | struct cookie *c = outgoing[i].cookie;
|
|---|
| 1231 | /* name=value */
|
|---|
| 1232 | result_size += strlen (c->attr) + 1 + strlen (c->value);
|
|---|
| 1233 | }
|
|---|
| 1234 |
|
|---|
| 1235 | /* Allocate output buffer:
|
|---|
| 1236 | name=value pairs -- result_size
|
|---|
| 1237 | "; " separators -- (count - 1) * 2
|
|---|
| 1238 | \0 terminator -- 1 */
|
|---|
| 1239 | result_size = result_size + (count - 1) * 2 + 1;
|
|---|
| 1240 | result = xmalloc (result_size);
|
|---|
| 1241 | pos = 0;
|
|---|
| 1242 | for (i = 0; i < count; i++)
|
|---|
| 1243 | {
|
|---|
| 1244 | struct cookie *c = outgoing[i].cookie;
|
|---|
| 1245 | int namlen = strlen (c->attr);
|
|---|
| 1246 | int vallen = strlen (c->value);
|
|---|
| 1247 |
|
|---|
| 1248 | memcpy (result + pos, c->attr, namlen);
|
|---|
| 1249 | pos += namlen;
|
|---|
| 1250 | result[pos++] = '=';
|
|---|
| 1251 | memcpy (result + pos, c->value, vallen);
|
|---|
| 1252 | pos += vallen;
|
|---|
| 1253 | if (i < count - 1)
|
|---|
| 1254 | {
|
|---|
| 1255 | result[pos++] = ';';
|
|---|
| 1256 | result[pos++] = ' ';
|
|---|
| 1257 | }
|
|---|
| 1258 | }
|
|---|
| 1259 | result[pos++] = '\0';
|
|---|
| 1260 | assert (pos == result_size);
|
|---|
| 1261 | return result;
|
|---|
| 1262 | }
|
|---|
| 1263 | |
|---|
| 1264 |
|
|---|
| 1265 | /* Support for loading and saving cookies. The format used for
|
|---|
| 1266 | loading and saving should be the format of the `cookies.txt' file
|
|---|
| 1267 | used by Netscape and Mozilla, at least the Unix versions.
|
|---|
| 1268 | (Apparently IE can export cookies in that format as well.) The
|
|---|
| 1269 | format goes like this:
|
|---|
| 1270 |
|
|---|
| 1271 | DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
|
|---|
| 1272 |
|
|---|
| 1273 | DOMAIN -- cookie domain, optionally followed by :PORT
|
|---|
| 1274 | DOMAIN-FLAG -- whether all hosts in the domain match
|
|---|
| 1275 | PATH -- cookie path
|
|---|
| 1276 | SECURE-FLAG -- whether cookie requires secure connection
|
|---|
| 1277 | TIMESTAMP -- expiry timestamp, number of seconds since epoch
|
|---|
| 1278 | ATTR-NAME -- name of the cookie attribute
|
|---|
| 1279 | ATTR-VALUE -- value of the cookie attribute (empty if absent)
|
|---|
| 1280 |
|
|---|
| 1281 | The fields are separated by TABs. All fields are mandatory, except
|
|---|
| 1282 | for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
|
|---|
| 1283 | being "TRUE" and "FALSE'. Empty lines, lines consisting of
|
|---|
| 1284 | whitespace only, and comment lines (beginning with # optionally
|
|---|
| 1285 | preceded by whitespace) are ignored.
|
|---|
| 1286 |
|
|---|
| 1287 | Example line from cookies.txt (split in two lines for readability):
|
|---|
| 1288 |
|
|---|
| 1289 | .google.com TRUE / FALSE 2147368447 \
|
|---|
| 1290 | PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
|
|---|
| 1291 |
|
|---|
| 1292 | */
|
|---|
| 1293 |
|
|---|
| 1294 | /* If the region [B, E) ends with :<digits>, parse the number, return
|
|---|
| 1295 | it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
|
|---|
| 1296 | If port is not specified, return 0. */
|
|---|
| 1297 |
|
|---|
| 1298 | static int
|
|---|
| 1299 | domain_port (const char *domain_b, const char *domain_e,
|
|---|
| 1300 | const char **domain_e_ptr)
|
|---|
| 1301 | {
|
|---|
| 1302 | int port = 0;
|
|---|
| 1303 | const char *p;
|
|---|
| 1304 | const char *colon = memchr (domain_b, ':', domain_e - domain_b);
|
|---|
| 1305 | if (!colon)
|
|---|
| 1306 | return 0;
|
|---|
| 1307 | for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
|
|---|
| 1308 | port = 10 * port + (*p - '0');
|
|---|
| 1309 | if (p < domain_e)
|
|---|
| 1310 | /* Garbage following port number. */
|
|---|
| 1311 | return 0;
|
|---|
| 1312 | *domain_e_ptr = colon;
|
|---|
| 1313 | return port;
|
|---|
| 1314 | }
|
|---|
| 1315 |
|
|---|
| 1316 | #define GET_WORD(p, b, e) do { \
|
|---|
| 1317 | b = p; \
|
|---|
| 1318 | while (*p && *p != '\t') \
|
|---|
| 1319 | ++p; \
|
|---|
| 1320 | e = p; \
|
|---|
| 1321 | if (b == e || !*p) \
|
|---|
| 1322 | goto next; \
|
|---|
| 1323 | ++p; \
|
|---|
| 1324 | } while (0)
|
|---|
| 1325 |
|
|---|
| 1326 | /* Load cookies from FILE. */
|
|---|
| 1327 |
|
|---|
| 1328 | void
|
|---|
| 1329 | cookie_jar_load (struct cookie_jar *jar, const char *file)
|
|---|
| 1330 | {
|
|---|
| 1331 | char *line;
|
|---|
| 1332 | FILE *fp = fopen (file, "r");
|
|---|
| 1333 | if (!fp)
|
|---|
| 1334 | {
|
|---|
| 1335 | logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
|
|---|
| 1336 | file, strerror (errno));
|
|---|
| 1337 | return;
|
|---|
| 1338 | }
|
|---|
| 1339 | cookies_now = time (NULL);
|
|---|
| 1340 |
|
|---|
| 1341 | for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
|
|---|
| 1342 | {
|
|---|
| 1343 | struct cookie *cookie;
|
|---|
| 1344 | char *p = line;
|
|---|
| 1345 |
|
|---|
| 1346 | double expiry;
|
|---|
| 1347 | int port;
|
|---|
| 1348 |
|
|---|
| 1349 | char *domain_b = NULL, *domain_e = NULL;
|
|---|
| 1350 | char *domflag_b = NULL, *domflag_e = NULL;
|
|---|
| 1351 | char *path_b = NULL, *path_e = NULL;
|
|---|
| 1352 | char *secure_b = NULL, *secure_e = NULL;
|
|---|
| 1353 | char *expires_b = NULL, *expires_e = NULL;
|
|---|
| 1354 | char *name_b = NULL, *name_e = NULL;
|
|---|
| 1355 | char *value_b = NULL, *value_e = NULL;
|
|---|
| 1356 |
|
|---|
| 1357 | /* Skip leading white-space. */
|
|---|
| 1358 | while (*p && ISSPACE (*p))
|
|---|
| 1359 | ++p;
|
|---|
| 1360 | /* Ignore empty lines. */
|
|---|
| 1361 | if (!*p || *p == '#')
|
|---|
| 1362 | continue;
|
|---|
| 1363 |
|
|---|
| 1364 | GET_WORD (p, domain_b, domain_e);
|
|---|
| 1365 | GET_WORD (p, domflag_b, domflag_e);
|
|---|
| 1366 | GET_WORD (p, path_b, path_e);
|
|---|
| 1367 | GET_WORD (p, secure_b, secure_e);
|
|---|
| 1368 | GET_WORD (p, expires_b, expires_e);
|
|---|
| 1369 | GET_WORD (p, name_b, name_e);
|
|---|
| 1370 |
|
|---|
| 1371 | /* Don't use GET_WORD for value because it ends with newline,
|
|---|
| 1372 | not TAB. */
|
|---|
| 1373 | value_b = p;
|
|---|
| 1374 | value_e = p + strlen (p);
|
|---|
| 1375 | if (value_e > value_b && value_e[-1] == '\n')
|
|---|
| 1376 | --value_e;
|
|---|
| 1377 | if (value_e > value_b && value_e[-1] == '\r')
|
|---|
| 1378 | --value_e;
|
|---|
| 1379 | /* Empty values are legal (I think), so don't bother checking. */
|
|---|
| 1380 |
|
|---|
| 1381 | cookie = cookie_new ();
|
|---|
| 1382 |
|
|---|
| 1383 | cookie->attr = strdupdelim (name_b, name_e);
|
|---|
| 1384 | cookie->value = strdupdelim (value_b, value_e);
|
|---|
| 1385 | cookie->path = strdupdelim (path_b, path_e);
|
|---|
| 1386 | cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
|
|---|
| 1387 |
|
|---|
| 1388 | /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
|
|---|
| 1389 | value indicating if all machines within a given domain can
|
|---|
| 1390 | access the variable. This value is set automatically by the
|
|---|
| 1391 | browser, depending on the value set for the domain." */
|
|---|
| 1392 | cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
|
|---|
| 1393 |
|
|---|
| 1394 | /* DOMAIN needs special treatment because we might need to
|
|---|
| 1395 | extract the port. */
|
|---|
| 1396 | port = domain_port (domain_b, domain_e, (const char **)&domain_e);
|
|---|
| 1397 | if (port)
|
|---|
| 1398 | cookie->port = port;
|
|---|
| 1399 |
|
|---|
| 1400 | if (*domain_b == '.')
|
|---|
| 1401 | ++domain_b; /* remove leading dot internally */
|
|---|
| 1402 | cookie->domain = strdupdelim (domain_b, domain_e);
|
|---|
| 1403 |
|
|---|
| 1404 | /* safe default in case EXPIRES field is garbled. */
|
|---|
| 1405 | expiry = (double)cookies_now - 1;
|
|---|
| 1406 |
|
|---|
| 1407 | /* I don't like changing the line, but it's safe here. (line is
|
|---|
| 1408 | malloced.) */
|
|---|
| 1409 | *expires_e = '\0';
|
|---|
| 1410 | sscanf (expires_b, "%lf", &expiry);
|
|---|
| 1411 |
|
|---|
| 1412 | if (expiry == 0)
|
|---|
| 1413 | {
|
|---|
| 1414 | /* EXPIRY can be 0 for session cookies saved because the
|
|---|
| 1415 | user specified `--keep-session-cookies' in the past.
|
|---|
| 1416 | They remain session cookies, and will be saved only if
|
|---|
| 1417 | the user has specified `keep-session-cookies' again. */
|
|---|
| 1418 | }
|
|---|
| 1419 | else
|
|---|
| 1420 | {
|
|---|
| 1421 | if (expiry < cookies_now)
|
|---|
| 1422 | goto abort_cookie; /* ignore stale cookie. */
|
|---|
| 1423 | cookie->expiry_time = expiry;
|
|---|
| 1424 | cookie->permanent = 1;
|
|---|
| 1425 | }
|
|---|
| 1426 |
|
|---|
| 1427 | store_cookie (jar, cookie);
|
|---|
| 1428 |
|
|---|
| 1429 | next:
|
|---|
| 1430 | continue;
|
|---|
| 1431 |
|
|---|
| 1432 | abort_cookie:
|
|---|
| 1433 | delete_cookie (cookie);
|
|---|
| 1434 | }
|
|---|
| 1435 | fclose (fp);
|
|---|
| 1436 | }
|
|---|
| 1437 |
|
|---|
| 1438 | /* Mapper for save_cookies callable by hash_table_map. VALUE points
|
|---|
| 1439 | to the head in a chain of cookies. The function prints the entire
|
|---|
| 1440 | chain. */
|
|---|
| 1441 |
|
|---|
| 1442 | static int
|
|---|
| 1443 | save_cookies_mapper (void *key, void *value, void *arg)
|
|---|
| 1444 | {
|
|---|
| 1445 | FILE *fp = (FILE *)arg;
|
|---|
| 1446 | char *domain = (char *)key;
|
|---|
| 1447 | struct cookie *cookie = (struct cookie *)value;
|
|---|
| 1448 | for (; cookie; cookie = cookie->next)
|
|---|
| 1449 | {
|
|---|
| 1450 | if (!cookie->permanent && !opt.keep_session_cookies)
|
|---|
| 1451 | continue;
|
|---|
| 1452 | if (cookie_expired_p (cookie))
|
|---|
| 1453 | continue;
|
|---|
| 1454 | if (!cookie->domain_exact)
|
|---|
| 1455 | fputc ('.', fp);
|
|---|
| 1456 | fputs (domain, fp);
|
|---|
| 1457 | if (cookie->port != PORT_ANY)
|
|---|
| 1458 | fprintf (fp, ":%d", cookie->port);
|
|---|
| 1459 | fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
|
|---|
| 1460 | cookie->domain_exact ? "FALSE" : "TRUE",
|
|---|
| 1461 | cookie->path, cookie->secure ? "TRUE" : "FALSE",
|
|---|
| 1462 | (double)cookie->expiry_time,
|
|---|
| 1463 | cookie->attr, cookie->value);
|
|---|
| 1464 | if (ferror (fp))
|
|---|
| 1465 | return 1; /* stop mapping */
|
|---|
| 1466 | }
|
|---|
| 1467 | return 0;
|
|---|
| 1468 | }
|
|---|
| 1469 |
|
|---|
| 1470 | /* Save cookies, in format described above, to FILE. */
|
|---|
| 1471 |
|
|---|
| 1472 | void
|
|---|
| 1473 | cookie_jar_save (struct cookie_jar *jar, const char *file)
|
|---|
| 1474 | {
|
|---|
| 1475 | FILE *fp;
|
|---|
| 1476 |
|
|---|
| 1477 | DEBUGP (("Saving cookies to %s.\n", file));
|
|---|
| 1478 |
|
|---|
| 1479 | cookies_now = time (NULL);
|
|---|
| 1480 |
|
|---|
| 1481 | fp = fopen (file, "w");
|
|---|
| 1482 | if (!fp)
|
|---|
| 1483 | {
|
|---|
| 1484 | logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
|
|---|
| 1485 | file, strerror (errno));
|
|---|
| 1486 | return;
|
|---|
| 1487 | }
|
|---|
| 1488 |
|
|---|
| 1489 | fputs ("# HTTP cookie file.\n", fp);
|
|---|
| 1490 | fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
|
|---|
| 1491 | fputs ("# Edit at your own risk.\n\n", fp);
|
|---|
| 1492 |
|
|---|
| 1493 | hash_table_map (jar->chains, save_cookies_mapper, fp);
|
|---|
| 1494 |
|
|---|
| 1495 | if (ferror (fp))
|
|---|
| 1496 | logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
|
|---|
| 1497 | file, strerror (errno));
|
|---|
| 1498 | if (fclose (fp) < 0)
|
|---|
| 1499 | logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
|
|---|
| 1500 | file, strerror (errno));
|
|---|
| 1501 |
|
|---|
| 1502 | DEBUGP (("Done saving cookies.\n"));
|
|---|
| 1503 | }
|
|---|
| 1504 | |
|---|
| 1505 |
|
|---|
| 1506 | /* Destroy all the elements in the chain and unhook it from the cookie
|
|---|
| 1507 | jar. This is written in the form of a callback to hash_table_map
|
|---|
| 1508 | and used by cookie_jar_delete to delete all the cookies in a
|
|---|
| 1509 | jar. */
|
|---|
| 1510 |
|
|---|
| 1511 | static int
|
|---|
| 1512 | nuke_cookie_chain (void *value, void *key, void *arg)
|
|---|
| 1513 | {
|
|---|
| 1514 | char *chain_key = (char *)value;
|
|---|
| 1515 | struct cookie *chain = (struct cookie *)key;
|
|---|
| 1516 | struct cookie_jar *jar = (struct cookie_jar *)arg;
|
|---|
| 1517 |
|
|---|
| 1518 | /* Remove the chain from the table and free the key. */
|
|---|
| 1519 | hash_table_remove (jar->chains, chain_key);
|
|---|
| 1520 | xfree (chain_key);
|
|---|
| 1521 |
|
|---|
| 1522 | /* Then delete all the cookies in the chain. */
|
|---|
| 1523 | while (chain)
|
|---|
| 1524 | {
|
|---|
| 1525 | struct cookie *next = chain->next;
|
|---|
| 1526 | delete_cookie (chain);
|
|---|
| 1527 | chain = next;
|
|---|
| 1528 | }
|
|---|
| 1529 |
|
|---|
| 1530 | /* Keep mapping. */
|
|---|
| 1531 | return 0;
|
|---|
| 1532 | }
|
|---|
| 1533 |
|
|---|
| 1534 | /* Clean up cookie-related data. */
|
|---|
| 1535 |
|
|---|
| 1536 | void
|
|---|
| 1537 | cookie_jar_delete (struct cookie_jar *jar)
|
|---|
| 1538 | {
|
|---|
| 1539 | hash_table_map (jar->chains, nuke_cookie_chain, jar);
|
|---|
| 1540 | hash_table_destroy (jar->chains);
|
|---|
| 1541 | xfree (jar);
|
|---|
| 1542 | }
|
|---|
| 1543 | |
|---|
| 1544 |
|
|---|
| 1545 | /* Test cases. Currently this is only tests parse_set_cookies. To
|
|---|
| 1546 | use, recompile Wget with -DTEST_COOKIES and call test_cookies()
|
|---|
| 1547 | from main. */
|
|---|
| 1548 |
|
|---|
| 1549 | #ifdef TEST_COOKIES
|
|---|
| 1550 | int test_count;
|
|---|
| 1551 | char *test_results[10];
|
|---|
| 1552 |
|
|---|
| 1553 | static int test_parse_cookies_callback (struct cookie *ignored,
|
|---|
| 1554 | const char *nb, const char *ne,
|
|---|
| 1555 | const char *vb, const char *ve)
|
|---|
| 1556 | {
|
|---|
| 1557 | test_results[test_count++] = strdupdelim (nb, ne);
|
|---|
| 1558 | test_results[test_count++] = strdupdelim (vb, ve);
|
|---|
| 1559 | return 1;
|
|---|
| 1560 | }
|
|---|
| 1561 |
|
|---|
| 1562 | void
|
|---|
| 1563 | test_cookies (void)
|
|---|
| 1564 | {
|
|---|
| 1565 | /* Tests expected to succeed: */
|
|---|
| 1566 | static struct {
|
|---|
| 1567 | char *data;
|
|---|
| 1568 | char *results[10];
|
|---|
| 1569 | } tests_succ[] = {
|
|---|
| 1570 | { "", {NULL} },
|
|---|
| 1571 | { "arg=value", {"arg", "value", NULL} },
|
|---|
| 1572 | { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
|
|---|
| 1573 | { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
|
|---|
| 1574 | { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
|
|---|
| 1575 | { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
|
|---|
| 1576 | { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
|
|---|
| 1577 | { "arg=", {"arg", "", NULL} },
|
|---|
| 1578 | { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
|
|---|
| 1579 | { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
|
|---|
| 1580 | };
|
|---|
| 1581 |
|
|---|
| 1582 | /* Tests expected to fail: */
|
|---|
| 1583 | static char *tests_fail[] = {
|
|---|
| 1584 | ";",
|
|---|
| 1585 | "arg=\"unterminated",
|
|---|
| 1586 | "=empty-name",
|
|---|
| 1587 | "arg1=;=another-empty-name",
|
|---|
| 1588 | };
|
|---|
| 1589 | int i;
|
|---|
| 1590 |
|
|---|
| 1591 | for (i = 0; i < countof (tests_succ); i++)
|
|---|
| 1592 | {
|
|---|
| 1593 | int ind;
|
|---|
| 1594 | char *data = tests_succ[i].data;
|
|---|
| 1595 | char **expected = tests_succ[i].results;
|
|---|
| 1596 | struct cookie *c;
|
|---|
| 1597 |
|
|---|
| 1598 | test_count = 0;
|
|---|
| 1599 | c = parse_set_cookies (data, test_parse_cookies_callback, 1);
|
|---|
| 1600 | if (!c)
|
|---|
| 1601 | {
|
|---|
| 1602 | printf ("NULL cookie returned for valid data: %s\n", data);
|
|---|
| 1603 | continue;
|
|---|
| 1604 | }
|
|---|
| 1605 |
|
|---|
| 1606 | for (ind = 0; ind < test_count; ind += 2)
|
|---|
| 1607 | {
|
|---|
| 1608 | if (!expected[ind])
|
|---|
| 1609 | break;
|
|---|
| 1610 | if (0 != strcmp (expected[ind], test_results[ind]))
|
|---|
| 1611 | printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
|
|---|
| 1612 | ind / 2 + 1, data, expected[ind], test_results[ind]);
|
|---|
| 1613 | if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
|
|---|
| 1614 | printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
|
|---|
| 1615 | ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
|
|---|
| 1616 | }
|
|---|
| 1617 | if (ind < test_count || expected[ind])
|
|---|
| 1618 | printf ("Unmatched number of results: %s\n", data);
|
|---|
| 1619 | }
|
|---|
| 1620 |
|
|---|
| 1621 | for (i = 0; i < countof (tests_fail); i++)
|
|---|
| 1622 | {
|
|---|
| 1623 | struct cookie *c;
|
|---|
| 1624 | char *data = tests_fail[i];
|
|---|
| 1625 | test_count = 0;
|
|---|
| 1626 | c = parse_set_cookies (data, test_parse_cookies_callback, 1);
|
|---|
| 1627 | if (c)
|
|---|
| 1628 | printf ("Failed to report error on invalid data: %s\n", data);
|
|---|
| 1629 | }
|
|---|
| 1630 | }
|
|---|
| 1631 | #endif /* TEST_COOKIES */
|
|---|