source: trunk/essentials/net-misc/wget/src/cookies.c

Last change on this file was 3440, checked in by bird, 18 years ago

wget 1.10.2

File size: 45.4 KB
Line 
1/* Support for cookies.
2 Copyright (C) 2001, 2002 Free Software Foundation, Inc.
3
4This file is part of GNU Wget.
5
6GNU Wget is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2 of the License, or (at
9your option) any later version.
10
11GNU Wget is distributed in the hope that it will be useful, but
12WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Wget; if not, write to the Free Software
18Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20In addition, as a special exception, the Free Software Foundation
21gives permission to link the code of its release of Wget with the
22OpenSSL project's "OpenSSL" library (or with modified versions of it
23that use the same license as the "OpenSSL" library), and distribute
24the linked executables. You must obey the GNU General Public License
25in all respects for all of the code used other than "OpenSSL". If you
26modify this file, you may extend this exception to your version of the
27file, but you are not obligated to do so. If you do not wish to do
28so, delete this exception statement from your version. */
29
30/* Written by Hrvoje Niksic. Parts are loosely inspired by the
31 cookie patch submitted by Tomasz Wegrzanowski.
32
33 This implements the client-side cookie support, as specified
34 (loosely) by Netscape's "preliminary specification", currently
35 available at:
36
37 http://wp.netscape.com/newsref/std/cookie_spec.html
38
39 rfc2109 is not supported because of its incompatibilities with the
40 above widely-used specification. rfc2965 is entirely ignored,
41 since popular client software doesn't implement it, and even the
42 sites that do send Set-Cookie2 also emit Set-Cookie for
43 compatibility. */
44
45#include <config.h>
46
47#include <stdio.h>
48#ifdef HAVE_STRING_H
49# include <string.h>
50#else
51# include <strings.h>
52#endif
53#include <stdlib.h>
54#include <assert.h>
55#include <errno.h>
56
57#include "wget.h"
58#include "utils.h"
59#include "hash.h"
60#include "cookies.h"
61
62/* This should *really* be in a .h file! */
63time_t http_atotm PARAMS ((const char *));
64
65
66/* Declarations of `struct cookie' and the most basic functions. */
67
68/* Cookie jar serves as cookie storage and a means of retrieving
69 cookies efficiently. All cookies with the same domain are stored
70 in a linked list called "chain". A cookie chain can be reached by
71 looking up the domain in the cookie jar's chains_by_domain table.
72
73 For example, to reach all the cookies under google.com, one must
74 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
75 course, when sending a cookie to `www.google.com', one must search
76 for cookies that belong to either `www.google.com' or `google.com'
77 -- but the point is that the code doesn't need to go through *all*
78 the cookies. */
79
80struct cookie_jar {
81 /* Cookie chains indexed by domain. */
82 struct hash_table *chains;
83
84 int cookie_count; /* number of cookies in the jar. */
85};
86
87/* Value set by entry point functions, so that the low-level
88 routines don't need to call time() all the time. */
89static time_t cookies_now;
90
91struct cookie_jar *
92cookie_jar_new (void)
93{
94 struct cookie_jar *jar = xnew (struct cookie_jar);
95 jar->chains = make_nocase_string_hash_table (0);
96 jar->cookie_count = 0;
97 return jar;
98}
99
100struct cookie {
101 char *domain; /* domain of the cookie */
102 int port; /* port number */
103 char *path; /* path prefix of the cookie */
104
105 int secure; /* whether cookie should be
106 transmitted over non-https
107 connections. */
108 int domain_exact; /* whether DOMAIN must match as a
109 whole. */
110
111 int permanent; /* whether the cookie should outlive
112 the session. */
113 time_t expiry_time; /* time when the cookie expires, 0
114 means undetermined. */
115
116 int discard_requested; /* whether cookie was created to
117 request discarding another
118 cookie. */
119
120 char *attr; /* cookie attribute name */
121 char *value; /* cookie attribute value */
122
123 struct cookie *next; /* used for chaining of cookies in the
124 same domain. */
125};
126
127#define PORT_ANY (-1)
128
129/* Allocate and return a new, empty cookie structure. */
130
131static struct cookie *
132cookie_new (void)
133{
134 struct cookie *cookie = xnew0 (struct cookie);
135
136 /* Both cookie->permanent and cookie->expiry_time are now 0. This
137 means that the cookie doesn't expire, but is only valid for this
138 session (i.e. not written out to disk). */
139
140 cookie->port = PORT_ANY;
141 return cookie;
142}
143
144/* Non-zero if the cookie has expired. Assumes cookies_now has been
145 set by one of the entry point functions. */
146
147static int
148cookie_expired_p (const struct cookie *c)
149{
150 return c->expiry_time != 0 && c->expiry_time < cookies_now;
151}
152
153/* Deallocate COOKIE and its components. */
154
155static void
156delete_cookie (struct cookie *cookie)
157{
158 xfree_null (cookie->domain);
159 xfree_null (cookie->path);
160 xfree_null (cookie->attr);
161 xfree_null (cookie->value);
162 xfree (cookie);
163}
164
165
166/* Functions for storing cookies.
167
168 All cookies can be reached beginning with jar->chains. The key in
169 that table is the domain name, and the value is a linked list of
170 all cookies from that domain. Every new cookie is placed on the
171 head of the list. */
172
173/* Find and return a cookie in JAR whose domain, path, and attribute
174 name correspond to COOKIE. If found, PREVPTR will point to the
175 location of the cookie previous in chain, or NULL if the found
176 cookie is the head of a chain.
177
178 If no matching cookie is found, return NULL. */
179
180static struct cookie *
181find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
182 struct cookie **prevptr)
183{
184 struct cookie *chain, *prev;
185
186 chain = hash_table_get (jar->chains, cookie->domain);
187 if (!chain)
188 goto nomatch;
189
190 prev = NULL;
191 for (; chain; prev = chain, chain = chain->next)
192 if (0 == strcmp (cookie->path, chain->path)
193 && 0 == strcmp (cookie->attr, chain->attr)
194 && cookie->port == chain->port)
195 {
196 *prevptr = prev;
197 return chain;
198 }
199
200 nomatch:
201 *prevptr = NULL;
202 return NULL;
203}
204
205/* Store COOKIE to the jar.
206
207 This is done by placing COOKIE at the head of its chain. However,
208 if COOKIE matches a cookie already in memory, as determined by
209 find_matching_cookie, the old cookie is unlinked and destroyed.
210
211 The key of each chain's hash table entry is allocated only the
212 first time; next hash_table_put's reuse the same key. */
213
214static void
215store_cookie (struct cookie_jar *jar, struct cookie *cookie)
216{
217 struct cookie *chain_head;
218 char *chain_key;
219
220 if (hash_table_get_pair (jar->chains, cookie->domain,
221 &chain_key, &chain_head))
222 {
223 /* A chain of cookies in this domain already exists. Check for
224 duplicates -- if an extant cookie exactly matches our domain,
225 port, path, and name, replace it. */
226 struct cookie *prev;
227 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
228
229 if (victim)
230 {
231 /* Remove VICTIM from the chain. COOKIE will be placed at
232 the head. */
233 if (prev)
234 {
235 prev->next = victim->next;
236 cookie->next = chain_head;
237 }
238 else
239 {
240 /* prev is NULL; apparently VICTIM was at the head of
241 the chain. This place will be taken by COOKIE, so
242 all we need to do is: */
243 cookie->next = victim->next;
244 }
245 delete_cookie (victim);
246 --jar->cookie_count;
247 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
248 }
249 else
250 cookie->next = chain_head;
251 }
252 else
253 {
254 /* We are now creating the chain. Use a copy of cookie->domain
255 as the key for the life-time of the chain. Using
256 cookie->domain would be unsafe because the life-time of the
257 chain may exceed the life-time of the cookie. (Cookies may
258 be deleted from the chain by this very function.) */
259 cookie->next = NULL;
260 chain_key = xstrdup (cookie->domain);
261 }
262
263 hash_table_put (jar->chains, chain_key, cookie);
264 ++jar->cookie_count;
265
266#ifdef ENABLE_DEBUG
267 if (opt.debug)
268 {
269 time_t exptime = cookie->expiry_time;
270 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
271 cookie->domain, cookie->port,
272 cookie->port == PORT_ANY ? " (ANY)" : "",
273 cookie->path,
274 cookie->permanent ? "permanent" : "session",
275 cookie->secure ? "secure" : "insecure",
276 cookie->expiry_time ? datetime_str (&exptime) : "none",
277 cookie->attr, cookie->value));
278 }
279#endif
280}
281
282/* Discard a cookie matching COOKIE's domain, port, path, and
283 attribute name. This gets called when we encounter a cookie whose
284 expiry date is in the past, or whose max-age is set to 0. The
285 former corresponds to netscape cookie spec, while the latter is
286 specified by rfc2109. */
287
288static void
289discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
290{
291 struct cookie *prev, *victim;
292
293 if (!hash_table_count (jar->chains))
294 /* No elements == nothing to discard. */
295 return;
296
297 victim = find_matching_cookie (jar, cookie, &prev);
298 if (victim)
299 {
300 if (prev)
301 /* Simply unchain the victim. */
302 prev->next = victim->next;
303 else
304 {
305 /* VICTIM was head of its chain. We need to place a new
306 cookie at the head. */
307 char *chain_key = NULL;
308 int res;
309
310 res = hash_table_get_pair (jar->chains, victim->domain,
311 &chain_key, NULL);
312 assert (res != 0);
313 if (!victim->next)
314 {
315 /* VICTIM was the only cookie in the chain. Destroy the
316 chain and deallocate the chain key. */
317 hash_table_remove (jar->chains, victim->domain);
318 xfree (chain_key);
319 }
320 else
321 hash_table_put (jar->chains, chain_key, victim->next);
322 }
323 delete_cookie (victim);
324 DEBUGP (("Discarded old cookie.\n"));
325 }
326}
327
328
329/* Functions for parsing the `Set-Cookie' header, and creating new
330 cookies from the wire. */
331
332#define NAME_IS(string_literal) \
333 BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
334
335#define VALUE_EXISTS (value_b && value_e)
336
337#define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
338
339/* Update the appropriate cookie field. [name_b, name_e) are expected
340 to delimit the attribute name, while [value_b, value_e) (optional)
341 should delimit the attribute value.
342
343 When called the first time, it will set the cookie's attribute name
344 and value. After that, it will check the attribute name for
345 special fields such as `domain', `path', etc. Where appropriate,
346 it will parse the values of the fields it recognizes and fill the
347 corresponding fields in COOKIE.
348
349 Returns 1 on success. Returns zero in case a syntax error is
350 found; such a cookie should be discarded. */
351
352static int
353update_cookie_field (struct cookie *cookie,
354 const char *name_b, const char *name_e,
355 const char *value_b, const char *value_e)
356{
357 assert (name_b != NULL && name_e != NULL);
358
359 if (!cookie->attr)
360 {
361 if (!VALUE_EXISTS)
362 return 0;
363 cookie->attr = strdupdelim (name_b, name_e);
364 cookie->value = strdupdelim (value_b, value_e);
365 return 1;
366 }
367
368 if (NAME_IS ("domain"))
369 {
370 if (!VALUE_NON_EMPTY)
371 return 0;
372 xfree_null (cookie->domain);
373 /* Strictly speaking, we should set cookie->domain_exact if the
374 domain doesn't begin with a dot. But many sites set the
375 domain to "foo.com" and expect "subhost.foo.com" to get the
376 cookie, and it apparently works. */
377 if (*value_b == '.')
378 ++value_b;
379 cookie->domain = strdupdelim (value_b, value_e);
380 return 1;
381 }
382 else if (NAME_IS ("path"))
383 {
384 if (!VALUE_NON_EMPTY)
385 return 0;
386 xfree_null (cookie->path);
387 cookie->path = strdupdelim (value_b, value_e);
388 return 1;
389 }
390 else if (NAME_IS ("expires"))
391 {
392 char *value_copy;
393 time_t expires;
394
395 if (!VALUE_NON_EMPTY)
396 return 0;
397 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
398
399 expires = http_atotm (value_copy);
400 if (expires != (time_t) -1)
401 {
402 cookie->permanent = 1;
403 cookie->expiry_time = expires;
404 }
405 else
406 /* Error in expiration spec. Assume default (cookie doesn't
407 expire, but valid only for this session.) */
408 ;
409
410 /* According to netscape's specification, expiry time in the
411 past means that discarding of a matching cookie is
412 requested. */
413 if (cookie->expiry_time < cookies_now)
414 cookie->discard_requested = 1;
415
416 return 1;
417 }
418 else if (NAME_IS ("max-age"))
419 {
420 double maxage = -1;
421 char *value_copy;
422
423 if (!VALUE_NON_EMPTY)
424 return 0;
425 BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
426
427 sscanf (value_copy, "%lf", &maxage);
428 if (maxage == -1)
429 /* something went wrong. */
430 return 0;
431 cookie->permanent = 1;
432 cookie->expiry_time = cookies_now + maxage;
433
434 /* According to rfc2109, a cookie with max-age of 0 means that
435 discarding of a matching cookie is requested. */
436 if (maxage == 0)
437 cookie->discard_requested = 1;
438
439 return 1;
440 }
441 else if (NAME_IS ("secure"))
442 {
443 /* ignore value completely */
444 cookie->secure = 1;
445 return 1;
446 }
447 else
448 /* Unrecognized attribute; ignore it. */
449 return 1;
450}
451
452#undef NAME_IS
453
454/* Returns non-zero for characters that are legal in the name of an
455 attribute. This used to allow only alphanumerics, '-', and '_',
456 but we need to be more lenient because a number of sites wants to
457 use weirder attribute names. rfc2965 "informally specifies"
458 attribute name (token) as "a sequence of non-special, non-white
459 space characters". So we allow everything except the stuff we know
460 could harm us. */
461
462#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
463 && (c) != '"' && (c) != '=' \
464 && (c) != ';' && (c) != ',')
465
466/* Parse the contents of the `Set-Cookie' header. The header looks
467 like this:
468
469 name1=value1; name2=value2; ...
470
471 Trailing semicolon is optional; spaces are allowed between all
472 tokens. Additionally, values may be quoted.
473
474 A new cookie is returned upon success, NULL otherwise. The
475 specified CALLBACK function (normally `update_cookie_field' is used
476 to update the fields of the newly created cookie structure. */
477
478static struct cookie *
479parse_set_cookies (const char *sc,
480 int (*callback) (struct cookie *,
481 const char *, const char *,
482 const char *, const char *),
483 int silent)
484{
485 struct cookie *cookie = cookie_new ();
486
487 /* #### Hand-written DFAs are no fun to debug. We'de be better off
488 to rewrite this as an inline parser. */
489
490 enum { S_START, S_NAME, S_NAME_POST,
491 S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
492 S_ATTR_ACTION, S_DONE, S_ERROR
493 } state = S_START;
494
495 const char *p = sc;
496 char c;
497
498 const char *name_b = NULL, *name_e = NULL;
499 const char *value_b = NULL, *value_e = NULL;
500
501 c = *p;
502
503 while (state != S_DONE && state != S_ERROR)
504 {
505 switch (state)
506 {
507 case S_START:
508 if (!c)
509 state = S_DONE;
510 else if (ISSPACE (c))
511 /* Strip all whitespace preceding the name. */
512 c = *++p;
513 else if (ATTR_NAME_CHAR (c))
514 {
515 name_b = p;
516 state = S_NAME;
517 }
518 else
519 /* empty attr name not allowed */
520 state = S_ERROR;
521 break;
522 case S_NAME:
523 if (!c || c == ';' || c == '=' || ISSPACE (c))
524 {
525 name_e = p;
526 state = S_NAME_POST;
527 }
528 else if (ATTR_NAME_CHAR (c))
529 c = *++p;
530 else
531 state = S_ERROR;
532 break;
533 case S_NAME_POST:
534 if (!c || c == ';')
535 {
536 value_b = value_e = NULL;
537 if (c == ';')
538 c = *++p;
539 state = S_ATTR_ACTION;
540 }
541 else if (c == '=')
542 {
543 c = *++p;
544 state = S_VALUE_PRE;
545 }
546 else if (ISSPACE (c))
547 /* Ignore space and keep the state. */
548 c = *++p;
549 else
550 state = S_ERROR;
551 break;
552 case S_VALUE_PRE:
553 if (!c || c == ';')
554 {
555 value_b = value_e = p;
556 if (c == ';')
557 c = *++p;
558 state = S_ATTR_ACTION;
559 }
560 else if (c == '"')
561 {
562 c = *++p;
563 value_b = p;
564 state = S_QUOTED_VALUE;
565 }
566 else if (ISSPACE (c))
567 c = *++p;
568 else
569 {
570 value_b = p;
571 value_e = NULL;
572 state = S_VALUE;
573 }
574 break;
575 case S_VALUE:
576 if (!c || c == ';' || ISSPACE (c))
577 {
578 value_e = p;
579 state = S_VALUE_TRAILSPACE;
580 }
581 else
582 {
583 value_e = NULL; /* no trailing space */
584 c = *++p;
585 }
586 break;
587 case S_QUOTED_VALUE:
588 if (c == '"')
589 {
590 value_e = p;
591 c = *++p;
592 state = S_VALUE_TRAILSPACE;
593 }
594 else if (!c)
595 state = S_ERROR;
596 else
597 c = *++p;
598 break;
599 case S_VALUE_TRAILSPACE:
600 if (c == ';')
601 {
602 c = *++p;
603 state = S_ATTR_ACTION;
604 }
605 else if (!c)
606 state = S_ATTR_ACTION;
607 else if (ISSPACE (c))
608 c = *++p;
609 else
610 state = S_VALUE;
611 break;
612 case S_ATTR_ACTION:
613 {
614 int legal = callback (cookie, name_b, name_e, value_b, value_e);
615 if (!legal)
616 {
617 if (!silent)
618 {
619 char *name;
620 BOUNDED_TO_ALLOCA (name_b, name_e, name);
621 logprintf (LOG_NOTQUIET,
622 _("Error in Set-Cookie, field `%s'"),
623 escnonprint (name));
624 }
625 state = S_ERROR;
626 break;
627 }
628 state = S_START;
629 }
630 break;
631 case S_DONE:
632 case S_ERROR:
633 /* handled by loop condition */
634 break;
635 }
636 }
637 if (state == S_DONE)
638 return cookie;
639
640 delete_cookie (cookie);
641 if (state != S_ERROR)
642 abort ();
643
644 if (!silent)
645 logprintf (LOG_NOTQUIET,
646 _("Syntax error in Set-Cookie: %s at position %d.\n"),
647 escnonprint (sc), (int) (p - sc));
648 return NULL;
649}
650
651
652/* Sanity checks. These are important, otherwise it is possible for
653 mailcious attackers to destroy important cookie information and/or
654 violate your privacy. */
655
656
657#define REQUIRE_DIGITS(p) do { \
658 if (!ISDIGIT (*p)) \
659 return 0; \
660 for (++p; ISDIGIT (*p); p++) \
661 ; \
662} while (0)
663
664#define REQUIRE_DOT(p) do { \
665 if (*p++ != '.') \
666 return 0; \
667} while (0)
668
669/* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
670
671 We don't want to call network functions like inet_addr() because
672 all we need is a check, preferrably one that is small, fast, and
673 well-defined. */
674
675static int
676numeric_address_p (const char *addr)
677{
678 const char *p = addr;
679
680 REQUIRE_DIGITS (p); /* A */
681 REQUIRE_DOT (p); /* . */
682 REQUIRE_DIGITS (p); /* B */
683 REQUIRE_DOT (p); /* . */
684 REQUIRE_DIGITS (p); /* C */
685 REQUIRE_DOT (p); /* . */
686 REQUIRE_DIGITS (p); /* D */
687
688 if (*p != '\0')
689 return 0;
690 return 1;
691}
692
693/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
694 Originally I tried to make the check compliant with rfc2109, but
695 the sites deviated too often, so I had to fall back to "tail
696 matching", as defined by the original Netscape's cookie spec. */
697
698static int
699check_domain_match (const char *cookie_domain, const char *host)
700{
701 DEBUGP (("cdm: 1"));
702
703 /* Numeric address requires exact match. It also requires HOST to
704 be an IP address. */
705 if (numeric_address_p (cookie_domain))
706 return 0 == strcmp (cookie_domain, host);
707
708 DEBUGP ((" 2"));
709
710 /* For the sake of efficiency, check for exact match first. */
711 if (0 == strcasecmp (cookie_domain, host))
712 return 1;
713
714 DEBUGP ((" 3"));
715
716 /* HOST must match the tail of cookie_domain. */
717 if (!match_tail (host, cookie_domain, 1))
718 return 0;
719
720 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
721 make sure that somebody is not trying to set the cookie for a
722 subdomain shared by many entities. For example, "company.co.uk"
723 must not be allowed to set a cookie for ".co.uk". On the other
724 hand, "sso.redhat.de" should be able to set a cookie for
725 ".redhat.de".
726
727 The only marginally sane way to handle this I can think of is to
728 reject on the basis of the length of the second-level domain name
729 (but when the top-level domain is unknown), with the assumption
730 that those of three or less characters could be reserved. For
731 example:
732
733 .co.org -> works because the TLD is known
734 .co.uk -> doesn't work because "co" is only two chars long
735 .com.au -> doesn't work because "com" is only 3 chars long
736 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
737 .cnn.de -> doesn't work for the same reason (ugh!!)
738 .abcd.de -> works because "abcd" is 4 chars long
739 .img.cnn.de -> works because it's not trying to set the 2nd level domain
740 .cnn.co.uk -> works for the same reason
741
742 That should prevent misuse, while allowing reasonable usage. If
743 someone knows of a better way to handle this, please let me
744 know. */
745 {
746 const char *p = cookie_domain;
747 int dccount = 1; /* number of domain components */
748 int ldcl = 0; /* last domain component length */
749 int nldcl = 0; /* next to last domain component length */
750 int out;
751 if (*p == '.')
752 /* Ignore leading period in this calculation. */
753 ++p;
754 DEBUGP ((" 4"));
755 for (out = 0; !out; p++)
756 switch (*p)
757 {
758 case '\0':
759 out = 1;
760 break;
761 case '.':
762 if (ldcl == 0)
763 /* Empty domain component found -- the domain is invalid. */
764 return 0;
765 if (*(p + 1) == '\0')
766 {
767 /* Tolerate trailing '.' by not treating the domain as
768 one ending with an empty domain component. */
769 out = 1;
770 break;
771 }
772 nldcl = ldcl;
773 ldcl = 0;
774 ++dccount;
775 break;
776 default:
777 ++ldcl;
778 }
779
780 DEBUGP ((" 5"));
781
782 if (dccount < 2)
783 return 0;
784
785 DEBUGP ((" 6"));
786
787 if (dccount == 2)
788 {
789 int i;
790 int known_toplevel = 0;
791 static const char *known_toplevel_domains[] = {
792 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
793 };
794 for (i = 0; i < countof (known_toplevel_domains); i++)
795 if (match_tail (cookie_domain, known_toplevel_domains[i], 1))
796 {
797 known_toplevel = 1;
798 break;
799 }
800 if (!known_toplevel && nldcl <= 3)
801 return 0;
802 }
803 }
804
805 DEBUGP ((" 7"));
806
807 /* Don't allow the host "foobar.com" to set a cookie for domain
808 "bar.com". */
809 if (*cookie_domain != '.')
810 {
811 int dlen = strlen (cookie_domain);
812 int hlen = strlen (host);
813 /* cookie host: hostname.foobar.com */
814 /* desired domain: bar.com */
815 /* '.' must be here in host-> ^ */
816 if (hlen > dlen && host[hlen - dlen - 1] != '.')
817 return 0;
818 }
819
820 DEBUGP ((" 8"));
821
822 return 1;
823}
824
825static int path_matches PARAMS ((const char *, const char *));
826
827/* Check whether PATH begins with COOKIE_PATH. */
828
829static int
830check_path_match (const char *cookie_path, const char *path)
831{
832 return path_matches (path, cookie_path);
833}
834
835/* Prepend '/' to string S. S is copied to fresh stack-allocated
836 space and its value is modified to point to the new location. */
837
838#define PREPEND_SLASH(s) do { \
839 char *PS_newstr = (char *) alloca (1 + strlen (s) + 1); \
840 *PS_newstr = '/'; \
841 strcpy (PS_newstr + 1, s); \
842 s = PS_newstr; \
843} while (0)
844
845
846
847/* Process the HTTP `Set-Cookie' header. This results in storing the
848 cookie or discarding a matching one, or ignoring it completely, all
849 depending on the contents. */
850
851void
852cookie_handle_set_cookie (struct cookie_jar *jar,
853 const char *host, int port,
854 const char *path, const char *set_cookie)
855{
856 struct cookie *cookie;
857 cookies_now = time (NULL);
858
859 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
860 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
861 simply prepend slash to PATH. */
862 PREPEND_SLASH (path);
863
864 cookie = parse_set_cookies (set_cookie, update_cookie_field, 0);
865 if (!cookie)
866 goto out;
867
868 /* Sanitize parts of cookie. */
869
870 if (!cookie->domain)
871 {
872 copy_domain:
873 /* If the domain was not provided, we use the one we're talking
874 to, and set exact match. */
875 cookie->domain = xstrdup (host);
876 cookie->domain_exact = 1;
877 /* Set the port, but only if it's non-default. */
878 if (port != 80 && port != 443)
879 cookie->port = port;
880 }
881 else
882 {
883 if (!check_domain_match (cookie->domain, host))
884 {
885 logprintf (LOG_NOTQUIET,
886 _("Cookie coming from %s attempted to set domain to %s\n"),
887 escnonprint (host), escnonprint (cookie->domain));
888 xfree (cookie->domain);
889 goto copy_domain;
890 }
891 }
892
893 if (!cookie->path)
894 {
895 /* The cookie doesn't set path: set it to the URL path, sans the
896 file part ("/dir/file" truncated to "/dir/"). */
897 char *trailing_slash = strrchr (path, '/');
898 if (trailing_slash)
899 cookie->path = strdupdelim (path, trailing_slash + 1);
900 else
901 /* no slash in the string -- can this even happen? */
902 cookie->path = xstrdup (path);
903 }
904 else
905 {
906 /* The cookie sets its own path; verify that it is legal. */
907 if (!check_path_match (cookie->path, path))
908 {
909 DEBUGP (("Attempt to fake the path: %s, %s\n",
910 cookie->path, path));
911 goto out;
912 }
913 }
914
915 /* Now store the cookie, or discard an existing cookie, if
916 discarding was requested. */
917
918 if (cookie->discard_requested)
919 {
920 discard_matching_cookie (jar, cookie);
921 goto out;
922 }
923
924 store_cookie (jar, cookie);
925 return;
926
927 out:
928 if (cookie)
929 delete_cookie (cookie);
930}
931
932
933/* Support for sending out cookies in HTTP requests, based on
934 previously stored cookies. Entry point is
935 `build_cookies_request'. */
936
937/* Return a count of how many times CHR occurs in STRING. */
938
939static int
940count_char (const char *string, char chr)
941{
942 const char *p;
943 int count = 0;
944 for (p = string; *p; p++)
945 if (*p == chr)
946 ++count;
947 return count;
948}
949
950/* Find the cookie chains whose domains match HOST and store them to
951 DEST.
952
953 A cookie chain is the head of a list of cookies that belong to a
954 host/domain. Given HOST "img.search.xemacs.org", this function
955 will return the chains for "img.search.xemacs.org",
956 "search.xemacs.org", and "xemacs.org" -- those of them that exist
957 (if any), that is.
958
959 DEST should be large enough to accept (in the worst case) as many
960 elements as there are domain components of HOST. */
961
962static int
963find_chains_of_host (struct cookie_jar *jar, const char *host,
964 struct cookie *dest[])
965{
966 int dest_count = 0;
967 int passes, passcnt;
968
969 /* Bail out quickly if there are no cookies in the jar. */
970 if (!hash_table_count (jar->chains))
971 return 0;
972
973 if (numeric_address_p (host))
974 /* If host is an IP address, only check for the exact match. */
975 passes = 1;
976 else
977 /* Otherwise, check all the subdomains except the top-level (last)
978 one. As a domain with N components has N-1 dots, the number of
979 passes equals the number of dots. */
980 passes = count_char (host, '.');
981
982 passcnt = 0;
983
984 /* Find chains that match HOST, starting with exact match and
985 progressing to less specific domains. For instance, given HOST
986 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
987 srk.fer.hr's, then fer.hr's. */
988 while (1)
989 {
990 struct cookie *chain = hash_table_get (jar->chains, host);
991 if (chain)
992 dest[dest_count++] = chain;
993 if (++passcnt >= passes)
994 break;
995 host = strchr (host, '.') + 1;
996 }
997
998 return dest_count;
999}
1000
1001/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
1002 otherwise. */
1003
1004static int
1005path_matches (const char *full_path, const char *prefix)
1006{
1007 int len = strlen (prefix);
1008
1009 if (0 != strncmp (full_path, prefix, len))
1010 /* FULL_PATH doesn't begin with PREFIX. */
1011 return 0;
1012
1013 /* Length of PREFIX determines the quality of the match. */
1014 return len + 1;
1015}
1016
1017/* Return non-zero iff COOKIE matches the provided parameters of the
1018 URL being downloaded: HOST, PORT, PATH, and SECFLAG.
1019
1020 If PATH_GOODNESS is non-NULL, store the "path goodness" value
1021 there. That value is a measure of how closely COOKIE matches PATH,
1022 used for ordering cookies. */
1023
1024static int
1025cookie_matches_url (const struct cookie *cookie,
1026 const char *host, int port, const char *path,
1027 int secflag, int *path_goodness)
1028{
1029 int pg;
1030
1031 if (cookie_expired_p (cookie))
1032 /* Ignore stale cookies. Don't bother unchaining the cookie at
1033 this point -- Wget is a relatively short-lived application, and
1034 stale cookies will not be saved by `save_cookies'. On the
1035 other hand, this function should be as efficient as
1036 possible. */
1037 return 0;
1038
1039 if (cookie->secure && !secflag)
1040 /* Don't transmit secure cookies over insecure connections. */
1041 return 0;
1042 if (cookie->port != PORT_ANY && cookie->port != port)
1043 return 0;
1044
1045 /* If exact domain match is required, verify that cookie's domain is
1046 equal to HOST. If not, assume success on the grounds of the
1047 cookie's chain having been found by find_chains_of_host. */
1048 if (cookie->domain_exact
1049 && 0 != strcasecmp (host, cookie->domain))
1050 return 0;
1051
1052 pg = path_matches (path, cookie->path);
1053 if (!pg)
1054 return 0;
1055
1056 if (path_goodness)
1057 /* If the caller requested path_goodness, we return it. This is
1058 an optimization, so that the caller doesn't need to call
1059 path_matches() again. */
1060 *path_goodness = pg;
1061 return 1;
1062}
1063
1064/* A structure that points to a cookie, along with the additional
1065 information about the cookie's "goodness". This allows us to sort
1066 the cookies when returning them to the server, as required by the
1067 spec. */
1068
1069struct weighed_cookie {
1070 struct cookie *cookie;
1071 int domain_goodness;
1072 int path_goodness;
1073};
1074
1075/* Comparator used for uniquifying the list. */
1076
1077static int
1078equality_comparator (const void *p1, const void *p2)
1079{
1080 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1081 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1082
1083 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
1084 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
1085
1086 /* We only really care whether both name and value are equal. We
1087 return them in this order only for consistency... */
1088 return namecmp ? namecmp : valuecmp;
1089}
1090
1091/* Eliminate duplicate cookies. "Duplicate cookies" are any two
1092 cookies with the same attr name and value. Whenever a duplicate
1093 pair is found, one of the cookies is removed. */
1094
1095static int
1096eliminate_dups (struct weighed_cookie *outgoing, int count)
1097{
1098 struct weighed_cookie *h; /* hare */
1099 struct weighed_cookie *t; /* tortoise */
1100 struct weighed_cookie *end = outgoing + count;
1101
1102 /* We deploy a simple uniquify algorithm: first sort the array
1103 according to our sort criteria, then copy it to itself, comparing
1104 each cookie to its neighbor and ignoring the duplicates. */
1105
1106 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1107
1108 /* "Hare" runs through all the entries in the array, followed by
1109 "tortoise". If a duplicate is found, the hare skips it.
1110 Non-duplicate entries are copied to the tortoise ptr. */
1111
1112 for (h = t = outgoing; h < end; h++)
1113 {
1114 if (h != end - 1)
1115 {
1116 struct cookie *c0 = h[0].cookie;
1117 struct cookie *c1 = h[1].cookie;
1118 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1119 continue; /* ignore the duplicate */
1120 }
1121
1122 /* If the hare has advanced past the tortoise (because of
1123 previous dups), make sure the values get copied. Otherwise,
1124 no copying is necessary. */
1125 if (h != t)
1126 *t++ = *h;
1127 else
1128 t++;
1129 }
1130 return t - outgoing;
1131}
1132
1133/* Comparator used for sorting by quality. */
1134
1135static int
1136goodness_comparator (const void *p1, const void *p2)
1137{
1138 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1139 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1140
1141 /* Subtractions take `wc2' as the first argument becauase we want a
1142 sort in *decreasing* order of goodness. */
1143 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1144 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1145
1146 /* Sort by domain goodness; if these are the same, sort by path
1147 goodness. (The sorting order isn't really specified; maybe it
1148 should be the other way around.) */
1149 return dgdiff ? dgdiff : pgdiff;
1150}
1151
1152/* Generate a `Cookie' header for a request that goes to HOST:PORT and
1153 requests PATH from the server. The resulting string is allocated
1154 with `malloc', and the caller is responsible for freeing it. If no
1155 cookies pertain to this request, i.e. no cookie header should be
1156 generated, NULL is returned. */
1157
1158char *
1159cookie_header (struct cookie_jar *jar, const char *host,
1160 int port, const char *path, int secflag)
1161{
1162 struct cookie **chains;
1163 int chain_count;
1164
1165 struct cookie *cookie;
1166 struct weighed_cookie *outgoing;
1167 int count, i, ocnt;
1168 char *result;
1169 int result_size, pos;
1170 PREPEND_SLASH (path); /* see cookie_handle_set_cookie */
1171
1172 /* First, find the cookie chains whose domains match HOST. */
1173
1174 /* Allocate room for find_chains_of_host to write to. The number of
1175 chains can at most equal the number of subdomains, hence
1176 1+<number of dots>. */
1177 chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
1178 chain_count = find_chains_of_host (jar, host, chains);
1179
1180 /* No cookies for this host. */
1181 if (!chain_count)
1182 return NULL;
1183
1184 cookies_now = time (NULL);
1185
1186 /* Now extract from the chains those cookies that match our host
1187 (for domain_exact cookies), port (for cookies with port other
1188 than PORT_ANY), etc. See matching_cookie for details. */
1189
1190 /* Count the number of matching cookies. */
1191 count = 0;
1192 for (i = 0; i < chain_count; i++)
1193 for (cookie = chains[i]; cookie; cookie = cookie->next)
1194 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1195 ++count;
1196 if (!count)
1197 return NULL; /* no cookies matched */
1198
1199 /* Allocate the array. */
1200 outgoing = alloca_array (struct weighed_cookie, count);
1201
1202 /* Fill the array with all the matching cookies from the chains that
1203 match HOST. */
1204 ocnt = 0;
1205 for (i = 0; i < chain_count; i++)
1206 for (cookie = chains[i]; cookie; cookie = cookie->next)
1207 {
1208 int pg;
1209 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1210 continue;
1211 outgoing[ocnt].cookie = cookie;
1212 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1213 outgoing[ocnt].path_goodness = pg;
1214 ++ocnt;
1215 }
1216 assert (ocnt == count);
1217
1218 /* Eliminate duplicate cookies; that is, those whose name and value
1219 are the same. */
1220 count = eliminate_dups (outgoing, count);
1221
1222 /* Sort the array so that best-matching domains come first, and
1223 that, within one domain, best-matching paths come first. */
1224 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1225
1226 /* Count the space the name=value pairs will take. */
1227 result_size = 0;
1228 for (i = 0; i < count; i++)
1229 {
1230 struct cookie *c = outgoing[i].cookie;
1231 /* name=value */
1232 result_size += strlen (c->attr) + 1 + strlen (c->value);
1233 }
1234
1235 /* Allocate output buffer:
1236 name=value pairs -- result_size
1237 "; " separators -- (count - 1) * 2
1238 \0 terminator -- 1 */
1239 result_size = result_size + (count - 1) * 2 + 1;
1240 result = xmalloc (result_size);
1241 pos = 0;
1242 for (i = 0; i < count; i++)
1243 {
1244 struct cookie *c = outgoing[i].cookie;
1245 int namlen = strlen (c->attr);
1246 int vallen = strlen (c->value);
1247
1248 memcpy (result + pos, c->attr, namlen);
1249 pos += namlen;
1250 result[pos++] = '=';
1251 memcpy (result + pos, c->value, vallen);
1252 pos += vallen;
1253 if (i < count - 1)
1254 {
1255 result[pos++] = ';';
1256 result[pos++] = ' ';
1257 }
1258 }
1259 result[pos++] = '\0';
1260 assert (pos == result_size);
1261 return result;
1262}
1263
1264
1265/* Support for loading and saving cookies. The format used for
1266 loading and saving should be the format of the `cookies.txt' file
1267 used by Netscape and Mozilla, at least the Unix versions.
1268 (Apparently IE can export cookies in that format as well.) The
1269 format goes like this:
1270
1271 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1272
1273 DOMAIN -- cookie domain, optionally followed by :PORT
1274 DOMAIN-FLAG -- whether all hosts in the domain match
1275 PATH -- cookie path
1276 SECURE-FLAG -- whether cookie requires secure connection
1277 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1278 ATTR-NAME -- name of the cookie attribute
1279 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1280
1281 The fields are separated by TABs. All fields are mandatory, except
1282 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1283 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1284 whitespace only, and comment lines (beginning with # optionally
1285 preceded by whitespace) are ignored.
1286
1287 Example line from cookies.txt (split in two lines for readability):
1288
1289 .google.com TRUE / FALSE 2147368447 \
1290 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1291
1292*/
1293
1294/* If the region [B, E) ends with :<digits>, parse the number, return
1295 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1296 If port is not specified, return 0. */
1297
1298static int
1299domain_port (const char *domain_b, const char *domain_e,
1300 const char **domain_e_ptr)
1301{
1302 int port = 0;
1303 const char *p;
1304 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1305 if (!colon)
1306 return 0;
1307 for (p = colon + 1; p < domain_e && ISDIGIT (*p); p++)
1308 port = 10 * port + (*p - '0');
1309 if (p < domain_e)
1310 /* Garbage following port number. */
1311 return 0;
1312 *domain_e_ptr = colon;
1313 return port;
1314}
1315
1316#define GET_WORD(p, b, e) do { \
1317 b = p; \
1318 while (*p && *p != '\t') \
1319 ++p; \
1320 e = p; \
1321 if (b == e || !*p) \
1322 goto next; \
1323 ++p; \
1324} while (0)
1325
1326/* Load cookies from FILE. */
1327
1328void
1329cookie_jar_load (struct cookie_jar *jar, const char *file)
1330{
1331 char *line;
1332 FILE *fp = fopen (file, "r");
1333 if (!fp)
1334 {
1335 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1336 file, strerror (errno));
1337 return;
1338 }
1339 cookies_now = time (NULL);
1340
1341 for (; ((line = read_whole_line (fp)) != NULL); xfree (line))
1342 {
1343 struct cookie *cookie;
1344 char *p = line;
1345
1346 double expiry;
1347 int port;
1348
1349 char *domain_b = NULL, *domain_e = NULL;
1350 char *domflag_b = NULL, *domflag_e = NULL;
1351 char *path_b = NULL, *path_e = NULL;
1352 char *secure_b = NULL, *secure_e = NULL;
1353 char *expires_b = NULL, *expires_e = NULL;
1354 char *name_b = NULL, *name_e = NULL;
1355 char *value_b = NULL, *value_e = NULL;
1356
1357 /* Skip leading white-space. */
1358 while (*p && ISSPACE (*p))
1359 ++p;
1360 /* Ignore empty lines. */
1361 if (!*p || *p == '#')
1362 continue;
1363
1364 GET_WORD (p, domain_b, domain_e);
1365 GET_WORD (p, domflag_b, domflag_e);
1366 GET_WORD (p, path_b, path_e);
1367 GET_WORD (p, secure_b, secure_e);
1368 GET_WORD (p, expires_b, expires_e);
1369 GET_WORD (p, name_b, name_e);
1370
1371 /* Don't use GET_WORD for value because it ends with newline,
1372 not TAB. */
1373 value_b = p;
1374 value_e = p + strlen (p);
1375 if (value_e > value_b && value_e[-1] == '\n')
1376 --value_e;
1377 if (value_e > value_b && value_e[-1] == '\r')
1378 --value_e;
1379 /* Empty values are legal (I think), so don't bother checking. */
1380
1381 cookie = cookie_new ();
1382
1383 cookie->attr = strdupdelim (name_b, name_e);
1384 cookie->value = strdupdelim (value_b, value_e);
1385 cookie->path = strdupdelim (path_b, path_e);
1386 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1387
1388 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1389 value indicating if all machines within a given domain can
1390 access the variable. This value is set automatically by the
1391 browser, depending on the value set for the domain." */
1392 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1393
1394 /* DOMAIN needs special treatment because we might need to
1395 extract the port. */
1396 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1397 if (port)
1398 cookie->port = port;
1399
1400 if (*domain_b == '.')
1401 ++domain_b; /* remove leading dot internally */
1402 cookie->domain = strdupdelim (domain_b, domain_e);
1403
1404 /* safe default in case EXPIRES field is garbled. */
1405 expiry = (double)cookies_now - 1;
1406
1407 /* I don't like changing the line, but it's safe here. (line is
1408 malloced.) */
1409 *expires_e = '\0';
1410 sscanf (expires_b, "%lf", &expiry);
1411
1412 if (expiry == 0)
1413 {
1414 /* EXPIRY can be 0 for session cookies saved because the
1415 user specified `--keep-session-cookies' in the past.
1416 They remain session cookies, and will be saved only if
1417 the user has specified `keep-session-cookies' again. */
1418 }
1419 else
1420 {
1421 if (expiry < cookies_now)
1422 goto abort_cookie; /* ignore stale cookie. */
1423 cookie->expiry_time = expiry;
1424 cookie->permanent = 1;
1425 }
1426
1427 store_cookie (jar, cookie);
1428
1429 next:
1430 continue;
1431
1432 abort_cookie:
1433 delete_cookie (cookie);
1434 }
1435 fclose (fp);
1436}
1437
1438/* Mapper for save_cookies callable by hash_table_map. VALUE points
1439 to the head in a chain of cookies. The function prints the entire
1440 chain. */
1441
1442static int
1443save_cookies_mapper (void *key, void *value, void *arg)
1444{
1445 FILE *fp = (FILE *)arg;
1446 char *domain = (char *)key;
1447 struct cookie *cookie = (struct cookie *)value;
1448 for (; cookie; cookie = cookie->next)
1449 {
1450 if (!cookie->permanent && !opt.keep_session_cookies)
1451 continue;
1452 if (cookie_expired_p (cookie))
1453 continue;
1454 if (!cookie->domain_exact)
1455 fputc ('.', fp);
1456 fputs (domain, fp);
1457 if (cookie->port != PORT_ANY)
1458 fprintf (fp, ":%d", cookie->port);
1459 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1460 cookie->domain_exact ? "FALSE" : "TRUE",
1461 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1462 (double)cookie->expiry_time,
1463 cookie->attr, cookie->value);
1464 if (ferror (fp))
1465 return 1; /* stop mapping */
1466 }
1467 return 0;
1468}
1469
1470/* Save cookies, in format described above, to FILE. */
1471
1472void
1473cookie_jar_save (struct cookie_jar *jar, const char *file)
1474{
1475 FILE *fp;
1476
1477 DEBUGP (("Saving cookies to %s.\n", file));
1478
1479 cookies_now = time (NULL);
1480
1481 fp = fopen (file, "w");
1482 if (!fp)
1483 {
1484 logprintf (LOG_NOTQUIET, _("Cannot open cookies file `%s': %s\n"),
1485 file, strerror (errno));
1486 return;
1487 }
1488
1489 fputs ("# HTTP cookie file.\n", fp);
1490 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (&cookies_now));
1491 fputs ("# Edit at your own risk.\n\n", fp);
1492
1493 hash_table_map (jar->chains, save_cookies_mapper, fp);
1494
1495 if (ferror (fp))
1496 logprintf (LOG_NOTQUIET, _("Error writing to `%s': %s\n"),
1497 file, strerror (errno));
1498 if (fclose (fp) < 0)
1499 logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
1500 file, strerror (errno));
1501
1502 DEBUGP (("Done saving cookies.\n"));
1503}
1504
1505
1506/* Destroy all the elements in the chain and unhook it from the cookie
1507 jar. This is written in the form of a callback to hash_table_map
1508 and used by cookie_jar_delete to delete all the cookies in a
1509 jar. */
1510
1511static int
1512nuke_cookie_chain (void *value, void *key, void *arg)
1513{
1514 char *chain_key = (char *)value;
1515 struct cookie *chain = (struct cookie *)key;
1516 struct cookie_jar *jar = (struct cookie_jar *)arg;
1517
1518 /* Remove the chain from the table and free the key. */
1519 hash_table_remove (jar->chains, chain_key);
1520 xfree (chain_key);
1521
1522 /* Then delete all the cookies in the chain. */
1523 while (chain)
1524 {
1525 struct cookie *next = chain->next;
1526 delete_cookie (chain);
1527 chain = next;
1528 }
1529
1530 /* Keep mapping. */
1531 return 0;
1532}
1533
1534/* Clean up cookie-related data. */
1535
1536void
1537cookie_jar_delete (struct cookie_jar *jar)
1538{
1539 hash_table_map (jar->chains, nuke_cookie_chain, jar);
1540 hash_table_destroy (jar->chains);
1541 xfree (jar);
1542}
1543
1544
1545/* Test cases. Currently this is only tests parse_set_cookies. To
1546 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1547 from main. */
1548
1549#ifdef TEST_COOKIES
1550int test_count;
1551char *test_results[10];
1552
1553static int test_parse_cookies_callback (struct cookie *ignored,
1554 const char *nb, const char *ne,
1555 const char *vb, const char *ve)
1556{
1557 test_results[test_count++] = strdupdelim (nb, ne);
1558 test_results[test_count++] = strdupdelim (vb, ve);
1559 return 1;
1560}
1561
1562void
1563test_cookies (void)
1564{
1565 /* Tests expected to succeed: */
1566 static struct {
1567 char *data;
1568 char *results[10];
1569 } tests_succ[] = {
1570 { "", {NULL} },
1571 { "arg=value", {"arg", "value", NULL} },
1572 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1573 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1574 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1575 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1576 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1577 { "arg=", {"arg", "", NULL} },
1578 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1579 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1580 };
1581
1582 /* Tests expected to fail: */
1583 static char *tests_fail[] = {
1584 ";",
1585 "arg=\"unterminated",
1586 "=empty-name",
1587 "arg1=;=another-empty-name",
1588 };
1589 int i;
1590
1591 for (i = 0; i < countof (tests_succ); i++)
1592 {
1593 int ind;
1594 char *data = tests_succ[i].data;
1595 char **expected = tests_succ[i].results;
1596 struct cookie *c;
1597
1598 test_count = 0;
1599 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1600 if (!c)
1601 {
1602 printf ("NULL cookie returned for valid data: %s\n", data);
1603 continue;
1604 }
1605
1606 for (ind = 0; ind < test_count; ind += 2)
1607 {
1608 if (!expected[ind])
1609 break;
1610 if (0 != strcmp (expected[ind], test_results[ind]))
1611 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1612 ind / 2 + 1, data, expected[ind], test_results[ind]);
1613 if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
1614 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1615 ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
1616 }
1617 if (ind < test_count || expected[ind])
1618 printf ("Unmatched number of results: %s\n", data);
1619 }
1620
1621 for (i = 0; i < countof (tests_fail); i++)
1622 {
1623 struct cookie *c;
1624 char *data = tests_fail[i];
1625 test_count = 0;
1626 c = parse_set_cookies (data, test_parse_cookies_callback, 1);
1627 if (c)
1628 printf ("Failed to report error on invalid data: %s\n", data);
1629 }
1630}
1631#endif /* TEST_COOKIES */
Note: See TracBrowser for help on using the repository browser.