1 | /* Host name resolution and matching.
|
---|
2 | Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
|
---|
3 |
|
---|
4 | This file is part of GNU Wget.
|
---|
5 |
|
---|
6 | GNU Wget is free software; you can redistribute it and/or modify
|
---|
7 | it under the terms of the GNU General Public License as published by
|
---|
8 | the Free Software Foundation; either version 2 of the License, or
|
---|
9 | (at your option) any later version.
|
---|
10 |
|
---|
11 | GNU Wget is distributed in the hope that it will be useful,
|
---|
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | GNU General Public License for more details.
|
---|
15 |
|
---|
16 | You should have received a copy of the GNU General Public License
|
---|
17 | along with Wget; if not, write to the Free Software
|
---|
18 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
19 |
|
---|
20 | In addition, as a special exception, the Free Software Foundation
|
---|
21 | gives permission to link the code of its release of Wget with the
|
---|
22 | OpenSSL project's "OpenSSL" library (or with modified versions of it
|
---|
23 | that use the same license as the "OpenSSL" library), and distribute
|
---|
24 | the linked executables. You must obey the GNU General Public License
|
---|
25 | in all respects for all of the code used other than "OpenSSL". If you
|
---|
26 | modify this file, you may extend this exception to your version of the
|
---|
27 | file, but you are not obligated to do so. If you do not wish to do
|
---|
28 | so, delete this exception statement from your version. */
|
---|
29 |
|
---|
30 | #include <config.h>
|
---|
31 |
|
---|
32 | #include <stdio.h>
|
---|
33 | #include <stdlib.h>
|
---|
34 | #ifdef HAVE_STRING_H
|
---|
35 | # include <string.h>
|
---|
36 | #else
|
---|
37 | # include <strings.h>
|
---|
38 | #endif
|
---|
39 | #include <assert.h>
|
---|
40 | #include <sys/types.h>
|
---|
41 |
|
---|
42 | #ifndef WINDOWS
|
---|
43 | # include <sys/socket.h>
|
---|
44 | # include <netinet/in.h>
|
---|
45 | # ifndef __BEOS__
|
---|
46 | # include <arpa/inet.h>
|
---|
47 | # endif
|
---|
48 | # include <netdb.h>
|
---|
49 | # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
|
---|
50 | #else /* WINDOWS */
|
---|
51 | # define SET_H_ERRNO(err) WSASetLastError (err)
|
---|
52 | #endif /* WINDOWS */
|
---|
53 |
|
---|
54 | #include <errno.h>
|
---|
55 |
|
---|
56 | #include "wget.h"
|
---|
57 | #include "utils.h"
|
---|
58 | #include "host.h"
|
---|
59 | #include "url.h"
|
---|
60 | #include "hash.h"
|
---|
61 |
|
---|
62 | #ifndef errno
|
---|
63 | extern int errno;
|
---|
64 | #endif
|
---|
65 |
|
---|
66 | #ifndef h_errno
|
---|
67 | # ifndef __CYGWIN__
|
---|
68 | extern int h_errno;
|
---|
69 | # endif
|
---|
70 | #endif
|
---|
71 |
|
---|
72 | #ifndef NO_ADDRESS
|
---|
73 | # define NO_ADDRESS NO_DATA
|
---|
74 | #endif
|
---|
75 |
|
---|
76 | /* Lists of IP addresses that result from running DNS queries. See
|
---|
77 | lookup_host for details. */
|
---|
78 |
|
---|
79 | struct address_list {
|
---|
80 | int count; /* number of adrresses */
|
---|
81 | ip_address *addresses; /* pointer to the string of addresses */
|
---|
82 |
|
---|
83 | int faulty; /* number of addresses known not to work. */
|
---|
84 | int connected; /* whether we were able to connect to
|
---|
85 | one of the addresses in the list,
|
---|
86 | at least once. */
|
---|
87 |
|
---|
88 | int refcount; /* reference count; when it drops to
|
---|
89 | 0, the entry is freed. */
|
---|
90 | };
|
---|
91 |
|
---|
92 | /* Get the bounds of the address list. */
|
---|
93 |
|
---|
94 | void
|
---|
95 | address_list_get_bounds (const struct address_list *al, int *start, int *end)
|
---|
96 | {
|
---|
97 | *start = al->faulty;
|
---|
98 | *end = al->count;
|
---|
99 | }
|
---|
100 |
|
---|
101 | /* Return a pointer to the address at position POS. */
|
---|
102 |
|
---|
103 | const ip_address *
|
---|
104 | address_list_address_at (const struct address_list *al, int pos)
|
---|
105 | {
|
---|
106 | assert (pos >= al->faulty && pos < al->count);
|
---|
107 | return al->addresses + pos;
|
---|
108 | }
|
---|
109 |
|
---|
110 | /* Return non-zero if AL contains IP, zero otherwise. */
|
---|
111 |
|
---|
112 | int
|
---|
113 | address_list_contains (const struct address_list *al, const ip_address *ip)
|
---|
114 | {
|
---|
115 | int i;
|
---|
116 | switch (ip->type)
|
---|
117 | {
|
---|
118 | case IPV4_ADDRESS:
|
---|
119 | for (i = 0; i < al->count; i++)
|
---|
120 | {
|
---|
121 | ip_address *cur = al->addresses + i;
|
---|
122 | if (cur->type == IPV4_ADDRESS
|
---|
123 | && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
|
---|
124 | ==
|
---|
125 | ADDRESS_IPV4_IN_ADDR (ip).s_addr))
|
---|
126 | return 1;
|
---|
127 | }
|
---|
128 | return 0;
|
---|
129 | #ifdef ENABLE_IPV6
|
---|
130 | case IPV6_ADDRESS:
|
---|
131 | for (i = 0; i < al->count; i++)
|
---|
132 | {
|
---|
133 | ip_address *cur = al->addresses + i;
|
---|
134 | if (cur->type == IPV6_ADDRESS
|
---|
135 | #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
|
---|
136 | && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
|
---|
137 | #endif
|
---|
138 | && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
|
---|
139 | &ADDRESS_IPV6_IN6_ADDR (ip)))
|
---|
140 | return 1;
|
---|
141 | }
|
---|
142 | return 0;
|
---|
143 | #endif /* ENABLE_IPV6 */
|
---|
144 | default:
|
---|
145 | abort ();
|
---|
146 | }
|
---|
147 | }
|
---|
148 |
|
---|
149 | /* Mark the INDEXth element of AL as faulty, so that the next time
|
---|
150 | this address list is used, the faulty element will be skipped. */
|
---|
151 |
|
---|
152 | void
|
---|
153 | address_list_set_faulty (struct address_list *al, int index)
|
---|
154 | {
|
---|
155 | /* We assume that the address list is traversed in order, so that a
|
---|
156 | "faulty" attempt is always preceded with all-faulty addresses,
|
---|
157 | and this is how Wget uses it. */
|
---|
158 | assert (index == al->faulty);
|
---|
159 |
|
---|
160 | ++al->faulty;
|
---|
161 | if (al->faulty >= al->count)
|
---|
162 | /* All addresses have been proven faulty. Since there's not much
|
---|
163 | sense in returning the user an empty address list the next
|
---|
164 | time, we'll rather make them all clean, so that they can be
|
---|
165 | retried anew. */
|
---|
166 | al->faulty = 0;
|
---|
167 | }
|
---|
168 |
|
---|
169 | /* Set the "connected" flag to true. This flag used by connect.c to
|
---|
170 | see if the host perhaps needs to be resolved again. */
|
---|
171 |
|
---|
172 | void
|
---|
173 | address_list_set_connected (struct address_list *al)
|
---|
174 | {
|
---|
175 | al->connected = 1;
|
---|
176 | }
|
---|
177 |
|
---|
178 | /* Return the value of the "connected" flag. */
|
---|
179 |
|
---|
180 | int
|
---|
181 | address_list_connected_p (const struct address_list *al)
|
---|
182 | {
|
---|
183 | return al->connected;
|
---|
184 | }
|
---|
185 |
|
---|
186 | #ifdef ENABLE_IPV6
|
---|
187 |
|
---|
188 | /* Create an address_list from the addresses in the given struct
|
---|
189 | addrinfo. */
|
---|
190 |
|
---|
191 | static struct address_list *
|
---|
192 | address_list_from_addrinfo (const struct addrinfo *ai)
|
---|
193 | {
|
---|
194 | struct address_list *al;
|
---|
195 | const struct addrinfo *ptr;
|
---|
196 | int cnt;
|
---|
197 | ip_address *ip;
|
---|
198 |
|
---|
199 | cnt = 0;
|
---|
200 | for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
|
---|
201 | if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
|
---|
202 | ++cnt;
|
---|
203 | if (cnt == 0)
|
---|
204 | return NULL;
|
---|
205 |
|
---|
206 | al = xnew0 (struct address_list);
|
---|
207 | al->addresses = xnew_array (ip_address, cnt);
|
---|
208 | al->count = cnt;
|
---|
209 | al->refcount = 1;
|
---|
210 |
|
---|
211 | ip = al->addresses;
|
---|
212 | for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
|
---|
213 | if (ptr->ai_family == AF_INET6)
|
---|
214 | {
|
---|
215 | const struct sockaddr_in6 *sin6 =
|
---|
216 | (const struct sockaddr_in6 *)ptr->ai_addr;
|
---|
217 | ip->type = IPV6_ADDRESS;
|
---|
218 | ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
|
---|
219 | #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
|
---|
220 | ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
|
---|
221 | #endif
|
---|
222 | ++ip;
|
---|
223 | }
|
---|
224 | else if (ptr->ai_family == AF_INET)
|
---|
225 | {
|
---|
226 | const struct sockaddr_in *sin =
|
---|
227 | (const struct sockaddr_in *)ptr->ai_addr;
|
---|
228 | ip->type = IPV4_ADDRESS;
|
---|
229 | ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
|
---|
230 | ++ip;
|
---|
231 | }
|
---|
232 | assert (ip - al->addresses == cnt);
|
---|
233 | return al;
|
---|
234 | }
|
---|
235 |
|
---|
236 | #define IS_IPV4(addr) (((const ip_address *) addr)->type == IPV4_ADDRESS)
|
---|
237 |
|
---|
238 | /* Compare two IP addresses by type, giving preference to the IPv4
|
---|
239 | address (sorting it first). In other words, return -1 if ADDR1 is
|
---|
240 | IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and
|
---|
241 | 0 otherwise.
|
---|
242 |
|
---|
243 | This is intended to be used as the comparator arg to a qsort-like
|
---|
244 | sorting function, which is why it accepts generic pointers. */
|
---|
245 |
|
---|
246 | static int
|
---|
247 | cmp_prefer_ipv4 (const void *addr1, const void *addr2)
|
---|
248 | {
|
---|
249 | return !IS_IPV4 (addr1) - !IS_IPV4 (addr2);
|
---|
250 | }
|
---|
251 |
|
---|
252 | #define IS_IPV6(addr) (((const ip_address *) addr)->type == IPV6_ADDRESS)
|
---|
253 |
|
---|
254 | /* Like the above, but give preference to the IPv6 address. */
|
---|
255 |
|
---|
256 | static int
|
---|
257 | cmp_prefer_ipv6 (const void *addr1, const void *addr2)
|
---|
258 | {
|
---|
259 | return !IS_IPV6 (addr1) - !IS_IPV6 (addr2);
|
---|
260 | }
|
---|
261 |
|
---|
262 | #else /* not ENABLE_IPV6 */
|
---|
263 |
|
---|
264 | /* Create an address_list from a NULL-terminated vector of IPv4
|
---|
265 | addresses. This kind of vector is returned by gethostbyname. */
|
---|
266 |
|
---|
267 | static struct address_list *
|
---|
268 | address_list_from_ipv4_addresses (char **vec)
|
---|
269 | {
|
---|
270 | int count, i;
|
---|
271 | struct address_list *al = xnew0 (struct address_list);
|
---|
272 |
|
---|
273 | count = 0;
|
---|
274 | while (vec[count])
|
---|
275 | ++count;
|
---|
276 | assert (count > 0);
|
---|
277 |
|
---|
278 | al->addresses = xnew_array (ip_address, count);
|
---|
279 | al->count = count;
|
---|
280 | al->refcount = 1;
|
---|
281 |
|
---|
282 | for (i = 0; i < count; i++)
|
---|
283 | {
|
---|
284 | ip_address *ip = &al->addresses[i];
|
---|
285 | ip->type = IPV4_ADDRESS;
|
---|
286 | memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
|
---|
287 | }
|
---|
288 |
|
---|
289 | return al;
|
---|
290 | }
|
---|
291 |
|
---|
292 | #endif /* not ENABLE_IPV6 */
|
---|
293 |
|
---|
294 | static void
|
---|
295 | address_list_delete (struct address_list *al)
|
---|
296 | {
|
---|
297 | xfree (al->addresses);
|
---|
298 | xfree (al);
|
---|
299 | }
|
---|
300 |
|
---|
301 | /* Mark the address list as being no longer in use. This will reduce
|
---|
302 | its reference count which will cause the list to be freed when the
|
---|
303 | count reaches 0. */
|
---|
304 |
|
---|
305 | void
|
---|
306 | address_list_release (struct address_list *al)
|
---|
307 | {
|
---|
308 | --al->refcount;
|
---|
309 | DEBUGP (("Releasing 0x%0*lx (new refcount %d).\n", PTR_FORMAT (al),
|
---|
310 | al->refcount));
|
---|
311 | if (al->refcount <= 0)
|
---|
312 | {
|
---|
313 | DEBUGP (("Deleting unused 0x%0*lx.\n", PTR_FORMAT (al)));
|
---|
314 | address_list_delete (al);
|
---|
315 | }
|
---|
316 | }
|
---|
317 | |
---|
318 |
|
---|
319 | /* Versions of gethostbyname and getaddrinfo that support timeout. */
|
---|
320 |
|
---|
321 | #ifndef ENABLE_IPV6
|
---|
322 |
|
---|
323 | struct ghbnwt_context {
|
---|
324 | const char *host_name;
|
---|
325 | struct hostent *hptr;
|
---|
326 | };
|
---|
327 |
|
---|
328 | static void
|
---|
329 | gethostbyname_with_timeout_callback (void *arg)
|
---|
330 | {
|
---|
331 | struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
|
---|
332 | ctx->hptr = gethostbyname (ctx->host_name);
|
---|
333 | }
|
---|
334 |
|
---|
335 | /* Just like gethostbyname, except it times out after TIMEOUT seconds.
|
---|
336 | In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
|
---|
337 | The function makes sure that when NULL is returned for reasons
|
---|
338 | other than timeout, errno is reset. */
|
---|
339 |
|
---|
340 | static struct hostent *
|
---|
341 | gethostbyname_with_timeout (const char *host_name, double timeout)
|
---|
342 | {
|
---|
343 | struct ghbnwt_context ctx;
|
---|
344 | ctx.host_name = host_name;
|
---|
345 | if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
|
---|
346 | {
|
---|
347 | SET_H_ERRNO (HOST_NOT_FOUND);
|
---|
348 | errno = ETIMEDOUT;
|
---|
349 | return NULL;
|
---|
350 | }
|
---|
351 | if (!ctx.hptr)
|
---|
352 | errno = 0;
|
---|
353 | return ctx.hptr;
|
---|
354 | }
|
---|
355 |
|
---|
356 | /* Print error messages for host errors. */
|
---|
357 | static char *
|
---|
358 | host_errstr (int error)
|
---|
359 | {
|
---|
360 | /* Can't use switch since some of these constants can be equal,
|
---|
361 | which makes the compiler complain about duplicate case
|
---|
362 | values. */
|
---|
363 | if (error == HOST_NOT_FOUND
|
---|
364 | || error == NO_RECOVERY
|
---|
365 | || error == NO_DATA
|
---|
366 | || error == NO_ADDRESS)
|
---|
367 | return _("Unknown host");
|
---|
368 | else if (error == TRY_AGAIN)
|
---|
369 | /* Message modeled after what gai_strerror returns in similar
|
---|
370 | circumstances. */
|
---|
371 | return _("Temporary failure in name resolution");
|
---|
372 | else
|
---|
373 | return _("Unknown error");
|
---|
374 | }
|
---|
375 |
|
---|
376 | #else /* ENABLE_IPV6 */
|
---|
377 |
|
---|
378 | struct gaiwt_context {
|
---|
379 | const char *node;
|
---|
380 | const char *service;
|
---|
381 | const struct addrinfo *hints;
|
---|
382 | struct addrinfo **res;
|
---|
383 | int exit_code;
|
---|
384 | };
|
---|
385 |
|
---|
386 | static void
|
---|
387 | getaddrinfo_with_timeout_callback (void *arg)
|
---|
388 | {
|
---|
389 | struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
|
---|
390 | ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
|
---|
391 | }
|
---|
392 |
|
---|
393 | /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
|
---|
394 | In case of timeout, the EAI_SYSTEM error code is returned and errno
|
---|
395 | is set to ETIMEDOUT. */
|
---|
396 |
|
---|
397 | static int
|
---|
398 | getaddrinfo_with_timeout (const char *node, const char *service,
|
---|
399 | const struct addrinfo *hints, struct addrinfo **res,
|
---|
400 | double timeout)
|
---|
401 | {
|
---|
402 | struct gaiwt_context ctx;
|
---|
403 | ctx.node = node;
|
---|
404 | ctx.service = service;
|
---|
405 | ctx.hints = hints;
|
---|
406 | ctx.res = res;
|
---|
407 |
|
---|
408 | if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
|
---|
409 | {
|
---|
410 | errno = ETIMEDOUT;
|
---|
411 | return EAI_SYSTEM;
|
---|
412 | }
|
---|
413 | return ctx.exit_code;
|
---|
414 | }
|
---|
415 |
|
---|
416 | #endif /* ENABLE_IPV6 */
|
---|
417 | |
---|
418 |
|
---|
419 | /* Pretty-print ADDR. When compiled without IPv6, this is the same as
|
---|
420 | inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
|
---|
421 | address. */
|
---|
422 |
|
---|
423 | const char *
|
---|
424 | pretty_print_address (const ip_address *addr)
|
---|
425 | {
|
---|
426 | switch (addr->type)
|
---|
427 | {
|
---|
428 | case IPV4_ADDRESS:
|
---|
429 | return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
|
---|
430 | #ifdef ENABLE_IPV6
|
---|
431 | case IPV6_ADDRESS:
|
---|
432 | {
|
---|
433 | static char buf[128];
|
---|
434 | inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
|
---|
435 | #if 0
|
---|
436 | #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
|
---|
437 | {
|
---|
438 | /* append "%SCOPE_ID" for all ?non-global? addresses */
|
---|
439 | char *p = buf + strlen (buf);
|
---|
440 | *p++ = '%';
|
---|
441 | number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
|
---|
442 | }
|
---|
443 | #endif
|
---|
444 | #endif
|
---|
445 | buf[sizeof (buf) - 1] = '\0';
|
---|
446 | return buf;
|
---|
447 | }
|
---|
448 | #endif
|
---|
449 | }
|
---|
450 | abort ();
|
---|
451 | }
|
---|
452 |
|
---|
453 | /* The following two functions were adapted from glibc. */
|
---|
454 |
|
---|
455 | static int
|
---|
456 | is_valid_ipv4_address (const char *str, const char *end)
|
---|
457 | {
|
---|
458 | int saw_digit = 0;
|
---|
459 | int octets = 0;
|
---|
460 | int val = 0;
|
---|
461 |
|
---|
462 | while (str < end)
|
---|
463 | {
|
---|
464 | int ch = *str++;
|
---|
465 |
|
---|
466 | if (ch >= '0' && ch <= '9')
|
---|
467 | {
|
---|
468 | val = val * 10 + (ch - '0');
|
---|
469 |
|
---|
470 | if (val > 255)
|
---|
471 | return 0;
|
---|
472 | if (saw_digit == 0)
|
---|
473 | {
|
---|
474 | if (++octets > 4)
|
---|
475 | return 0;
|
---|
476 | saw_digit = 1;
|
---|
477 | }
|
---|
478 | }
|
---|
479 | else if (ch == '.' && saw_digit == 1)
|
---|
480 | {
|
---|
481 | if (octets == 4)
|
---|
482 | return 0;
|
---|
483 | val = 0;
|
---|
484 | saw_digit = 0;
|
---|
485 | }
|
---|
486 | else
|
---|
487 | return 0;
|
---|
488 | }
|
---|
489 | if (octets < 4)
|
---|
490 | return 0;
|
---|
491 |
|
---|
492 | return 1;
|
---|
493 | }
|
---|
494 |
|
---|
495 | int
|
---|
496 | is_valid_ipv6_address (const char *str, const char *end)
|
---|
497 | {
|
---|
498 | /* Use lower-case for these to avoid clash with system headers. */
|
---|
499 | enum {
|
---|
500 | ns_inaddrsz = 4,
|
---|
501 | ns_in6addrsz = 16,
|
---|
502 | ns_int16sz = 2
|
---|
503 | };
|
---|
504 |
|
---|
505 | const char *curtok;
|
---|
506 | int tp;
|
---|
507 | const char *colonp;
|
---|
508 | int saw_xdigit;
|
---|
509 | unsigned int val;
|
---|
510 |
|
---|
511 | tp = 0;
|
---|
512 | colonp = NULL;
|
---|
513 |
|
---|
514 | if (str == end)
|
---|
515 | return 0;
|
---|
516 |
|
---|
517 | /* Leading :: requires some special handling. */
|
---|
518 | if (*str == ':')
|
---|
519 | {
|
---|
520 | ++str;
|
---|
521 | if (str == end || *str != ':')
|
---|
522 | return 0;
|
---|
523 | }
|
---|
524 |
|
---|
525 | curtok = str;
|
---|
526 | saw_xdigit = 0;
|
---|
527 | val = 0;
|
---|
528 |
|
---|
529 | while (str < end)
|
---|
530 | {
|
---|
531 | int ch = *str++;
|
---|
532 |
|
---|
533 | /* if ch is a number, add it to val. */
|
---|
534 | if (ISXDIGIT (ch))
|
---|
535 | {
|
---|
536 | val <<= 4;
|
---|
537 | val |= XDIGIT_TO_NUM (ch);
|
---|
538 | if (val > 0xffff)
|
---|
539 | return 0;
|
---|
540 | saw_xdigit = 1;
|
---|
541 | continue;
|
---|
542 | }
|
---|
543 |
|
---|
544 | /* if ch is a colon ... */
|
---|
545 | if (ch == ':')
|
---|
546 | {
|
---|
547 | curtok = str;
|
---|
548 | if (saw_xdigit == 0)
|
---|
549 | {
|
---|
550 | if (colonp != NULL)
|
---|
551 | return 0;
|
---|
552 | colonp = str + tp;
|
---|
553 | continue;
|
---|
554 | }
|
---|
555 | else if (str == end)
|
---|
556 | return 0;
|
---|
557 | if (tp > ns_in6addrsz - ns_int16sz)
|
---|
558 | return 0;
|
---|
559 | tp += ns_int16sz;
|
---|
560 | saw_xdigit = 0;
|
---|
561 | val = 0;
|
---|
562 | continue;
|
---|
563 | }
|
---|
564 |
|
---|
565 | /* if ch is a dot ... */
|
---|
566 | if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz)
|
---|
567 | && is_valid_ipv4_address (curtok, end) == 1)
|
---|
568 | {
|
---|
569 | tp += ns_inaddrsz;
|
---|
570 | saw_xdigit = 0;
|
---|
571 | break;
|
---|
572 | }
|
---|
573 |
|
---|
574 | return 0;
|
---|
575 | }
|
---|
576 |
|
---|
577 | if (saw_xdigit == 1)
|
---|
578 | {
|
---|
579 | if (tp > ns_in6addrsz - ns_int16sz)
|
---|
580 | return 0;
|
---|
581 | tp += ns_int16sz;
|
---|
582 | }
|
---|
583 |
|
---|
584 | if (colonp != NULL)
|
---|
585 | {
|
---|
586 | if (tp == ns_in6addrsz)
|
---|
587 | return 0;
|
---|
588 | tp = ns_in6addrsz;
|
---|
589 | }
|
---|
590 |
|
---|
591 | if (tp != ns_in6addrsz)
|
---|
592 | return 0;
|
---|
593 |
|
---|
594 | return 1;
|
---|
595 | }
|
---|
596 | |
---|
597 |
|
---|
598 | /* Simple host cache, used by lookup_host to speed up resolving. The
|
---|
599 | cache doesn't handle TTL because Wget is a fairly short-lived
|
---|
600 | application. Refreshing is attempted when connect fails, though --
|
---|
601 | see connect_to_host. */
|
---|
602 |
|
---|
603 | /* Mapping between known hosts and to lists of their addresses. */
|
---|
604 | static struct hash_table *host_name_addresses_map;
|
---|
605 |
|
---|
606 |
|
---|
607 | /* Return the host's resolved addresses from the cache, if
|
---|
608 | available. */
|
---|
609 |
|
---|
610 | static struct address_list *
|
---|
611 | cache_query (const char *host)
|
---|
612 | {
|
---|
613 | struct address_list *al;
|
---|
614 | if (!host_name_addresses_map)
|
---|
615 | return NULL;
|
---|
616 | al = hash_table_get (host_name_addresses_map, host);
|
---|
617 | if (al)
|
---|
618 | {
|
---|
619 | DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
|
---|
620 | ++al->refcount;
|
---|
621 | return al;
|
---|
622 | }
|
---|
623 | return NULL;
|
---|
624 | }
|
---|
625 |
|
---|
626 | /* Cache the DNS lookup of HOST. Subsequent invocations of
|
---|
627 | lookup_host will return the cached value. */
|
---|
628 |
|
---|
629 | static void
|
---|
630 | cache_store (const char *host, struct address_list *al)
|
---|
631 | {
|
---|
632 | if (!host_name_addresses_map)
|
---|
633 | host_name_addresses_map = make_nocase_string_hash_table (0);
|
---|
634 |
|
---|
635 | ++al->refcount;
|
---|
636 | hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
|
---|
637 |
|
---|
638 | #ifdef ENABLE_DEBUG
|
---|
639 | if (opt.debug)
|
---|
640 | {
|
---|
641 | int i;
|
---|
642 | debug_logprintf ("Caching %s =>", host);
|
---|
643 | for (i = 0; i < al->count; i++)
|
---|
644 | debug_logprintf (" %s", pretty_print_address (al->addresses + i));
|
---|
645 | debug_logprintf ("\n");
|
---|
646 | }
|
---|
647 | #endif
|
---|
648 | }
|
---|
649 |
|
---|
650 | /* Remove HOST from the DNS cache. Does nothing is HOST is not in
|
---|
651 | the cache. */
|
---|
652 |
|
---|
653 | static void
|
---|
654 | cache_remove (const char *host)
|
---|
655 | {
|
---|
656 | struct address_list *al;
|
---|
657 | if (!host_name_addresses_map)
|
---|
658 | return;
|
---|
659 | al = hash_table_get (host_name_addresses_map, host);
|
---|
660 | if (al)
|
---|
661 | {
|
---|
662 | address_list_release (al);
|
---|
663 | hash_table_remove (host_name_addresses_map, host);
|
---|
664 | }
|
---|
665 | }
|
---|
666 | |
---|
667 |
|
---|
668 | /* Look up HOST in DNS and return a list of IP addresses.
|
---|
669 |
|
---|
670 | This function caches its result so that, if the same host is passed
|
---|
671 | the second time, the addresses are returned without DNS lookup.
|
---|
672 | (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to
|
---|
673 | globally disable caching.)
|
---|
674 |
|
---|
675 | The order of the returned addresses is affected by the setting of
|
---|
676 | opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are
|
---|
677 | placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed
|
---|
678 | at the beginning; otherwise, the order is left intact. The
|
---|
679 | relative order of addresses with the same family is left
|
---|
680 | undisturbed in either case.
|
---|
681 |
|
---|
682 | FLAGS can be a combination of:
|
---|
683 | LH_SILENT - don't print the "resolving ... done" messages.
|
---|
684 | LH_BIND - resolve addresses for use with bind, which under
|
---|
685 | IPv6 means to use AI_PASSIVE flag to getaddrinfo.
|
---|
686 | Passive lookups are not cached under IPv6.
|
---|
687 | LH_REFRESH - if HOST is cached, remove the entry from the cache
|
---|
688 | and resolve it anew. */
|
---|
689 |
|
---|
690 | struct address_list *
|
---|
691 | lookup_host (const char *host, int flags)
|
---|
692 | {
|
---|
693 | struct address_list *al;
|
---|
694 | int silent = flags & LH_SILENT;
|
---|
695 | int use_cache;
|
---|
696 | int numeric_address = 0;
|
---|
697 | double timeout = opt.dns_timeout;
|
---|
698 |
|
---|
699 | #ifndef ENABLE_IPV6
|
---|
700 | /* If we're not using getaddrinfo, first check if HOST specifies a
|
---|
701 | numeric IPv4 address. Some implementations of gethostbyname
|
---|
702 | (e.g. the Ultrix one and possibly Winsock) don't accept
|
---|
703 | dotted-decimal IPv4 addresses. */
|
---|
704 | {
|
---|
705 | uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
|
---|
706 | if (addr_ipv4 != (uint32_t) -1)
|
---|
707 | {
|
---|
708 | /* No need to cache host->addr relation, just return the
|
---|
709 | address. */
|
---|
710 | char *vec[2];
|
---|
711 | vec[0] = (char *)&addr_ipv4;
|
---|
712 | vec[1] = NULL;
|
---|
713 | return address_list_from_ipv4_addresses (vec);
|
---|
714 | }
|
---|
715 | }
|
---|
716 | #else /* ENABLE_IPV6 */
|
---|
717 | /* If we're using getaddrinfo, at least check whether the address is
|
---|
718 | already numeric, in which case there is no need to print the
|
---|
719 | "Resolving..." output. (This comes at no additional cost since
|
---|
720 | the is_valid_ipv*_address are already required for
|
---|
721 | url_parse.) */
|
---|
722 | {
|
---|
723 | const char *end = host + strlen (host);
|
---|
724 | if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end))
|
---|
725 | numeric_address = 1;
|
---|
726 | }
|
---|
727 | #endif
|
---|
728 |
|
---|
729 | /* Cache is normally on, but can be turned off with --no-dns-cache.
|
---|
730 | Don't cache passive lookups under IPv6. */
|
---|
731 | use_cache = opt.dns_cache;
|
---|
732 | #ifdef ENABLE_IPV6
|
---|
733 | if ((flags & LH_BIND) || numeric_address)
|
---|
734 | use_cache = 0;
|
---|
735 | #endif
|
---|
736 |
|
---|
737 | /* Try to find the host in the cache so we don't need to talk to the
|
---|
738 | resolver. If LH_REFRESH is requested, remove HOST from the cache
|
---|
739 | instead. */
|
---|
740 | if (use_cache)
|
---|
741 | {
|
---|
742 | if (!(flags & LH_REFRESH))
|
---|
743 | {
|
---|
744 | al = cache_query (host);
|
---|
745 | if (al)
|
---|
746 | return al;
|
---|
747 | }
|
---|
748 | else
|
---|
749 | cache_remove (host);
|
---|
750 | }
|
---|
751 |
|
---|
752 | /* No luck with the cache; resolve HOST. */
|
---|
753 |
|
---|
754 | if (!silent && !numeric_address)
|
---|
755 | logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
|
---|
756 |
|
---|
757 | #ifdef ENABLE_IPV6
|
---|
758 | {
|
---|
759 | int err;
|
---|
760 | struct addrinfo hints, *res;
|
---|
761 |
|
---|
762 | xzero (hints);
|
---|
763 | hints.ai_socktype = SOCK_STREAM;
|
---|
764 | if (opt.ipv4_only)
|
---|
765 | hints.ai_family = AF_INET;
|
---|
766 | else if (opt.ipv6_only)
|
---|
767 | hints.ai_family = AF_INET6;
|
---|
768 | else
|
---|
769 | /* We tried using AI_ADDRCONFIG, but removed it because: it
|
---|
770 | misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and
|
---|
771 | it's unneeded since we sort the addresses anyway. */
|
---|
772 | hints.ai_family = AF_UNSPEC;
|
---|
773 |
|
---|
774 | if (flags & LH_BIND)
|
---|
775 | hints.ai_flags |= AI_PASSIVE;
|
---|
776 |
|
---|
777 | #ifdef AI_NUMERICHOST
|
---|
778 | if (numeric_address)
|
---|
779 | {
|
---|
780 | /* Where available, the AI_NUMERICHOST hint can prevent costly
|
---|
781 | access to DNS servers. */
|
---|
782 | hints.ai_flags |= AI_NUMERICHOST;
|
---|
783 | timeout = 0; /* no timeout needed when "resolving"
|
---|
784 | numeric hosts -- avoid setting up
|
---|
785 | signal handlers and such. */
|
---|
786 | }
|
---|
787 | #endif
|
---|
788 |
|
---|
789 | err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout);
|
---|
790 | if (err != 0 || res == NULL)
|
---|
791 | {
|
---|
792 | if (!silent)
|
---|
793 | logprintf (LOG_VERBOSE, _("failed: %s.\n"),
|
---|
794 | err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
|
---|
795 | return NULL;
|
---|
796 | }
|
---|
797 | al = address_list_from_addrinfo (res);
|
---|
798 | freeaddrinfo (res);
|
---|
799 | if (!al)
|
---|
800 | {
|
---|
801 | logprintf (LOG_VERBOSE,
|
---|
802 | _("failed: No IPv4/IPv6 addresses for host.\n"));
|
---|
803 | return NULL;
|
---|
804 | }
|
---|
805 |
|
---|
806 | /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per
|
---|
807 | --prefer-family) come first. Sorting is stable so the order of
|
---|
808 | the addresses with the same family is undisturbed. */
|
---|
809 | if (al->count > 1 && opt.prefer_family != prefer_none)
|
---|
810 | stable_sort (al->addresses, al->count, sizeof (ip_address),
|
---|
811 | opt.prefer_family == prefer_ipv4
|
---|
812 | ? cmp_prefer_ipv4 : cmp_prefer_ipv6);
|
---|
813 | }
|
---|
814 | #else /* not ENABLE_IPV6 */
|
---|
815 | {
|
---|
816 | struct hostent *hptr = gethostbyname_with_timeout (host, timeout);
|
---|
817 | if (!hptr)
|
---|
818 | {
|
---|
819 | if (!silent)
|
---|
820 | {
|
---|
821 | if (errno != ETIMEDOUT)
|
---|
822 | logprintf (LOG_VERBOSE, _("failed: %s.\n"),
|
---|
823 | host_errstr (h_errno));
|
---|
824 | else
|
---|
825 | logputs (LOG_VERBOSE, _("failed: timed out.\n"));
|
---|
826 | }
|
---|
827 | return NULL;
|
---|
828 | }
|
---|
829 | /* Do older systems have h_addr_list? */
|
---|
830 | al = address_list_from_ipv4_addresses (hptr->h_addr_list);
|
---|
831 | }
|
---|
832 | #endif /* not ENABLE_IPV6 */
|
---|
833 |
|
---|
834 | /* Print the addresses determined by DNS lookup, but no more than
|
---|
835 | three. */
|
---|
836 | if (!silent && !numeric_address)
|
---|
837 | {
|
---|
838 | int i;
|
---|
839 | int printmax = al->count <= 3 ? al->count : 3;
|
---|
840 | for (i = 0; i < printmax; i++)
|
---|
841 | {
|
---|
842 | logprintf (LOG_VERBOSE, "%s",
|
---|
843 | pretty_print_address (al->addresses + i));
|
---|
844 | if (i < printmax - 1)
|
---|
845 | logputs (LOG_VERBOSE, ", ");
|
---|
846 | }
|
---|
847 | if (printmax != al->count)
|
---|
848 | logputs (LOG_VERBOSE, ", ...");
|
---|
849 | logputs (LOG_VERBOSE, "\n");
|
---|
850 | }
|
---|
851 |
|
---|
852 | /* Cache the lookup information. */
|
---|
853 | if (use_cache)
|
---|
854 | cache_store (host, al);
|
---|
855 |
|
---|
856 | return al;
|
---|
857 | }
|
---|
858 | |
---|
859 |
|
---|
860 | /* Determine whether a URL is acceptable to be followed, according to
|
---|
861 | a list of domains to accept. */
|
---|
862 | int
|
---|
863 | accept_domain (struct url *u)
|
---|
864 | {
|
---|
865 | assert (u->host != NULL);
|
---|
866 | if (opt.domains)
|
---|
867 | {
|
---|
868 | if (!sufmatch ((const char **)opt.domains, u->host))
|
---|
869 | return 0;
|
---|
870 | }
|
---|
871 | if (opt.exclude_domains)
|
---|
872 | {
|
---|
873 | if (sufmatch ((const char **)opt.exclude_domains, u->host))
|
---|
874 | return 0;
|
---|
875 | }
|
---|
876 | return 1;
|
---|
877 | }
|
---|
878 |
|
---|
879 | /* Check whether WHAT is matched in LIST, each element of LIST being a
|
---|
880 | pattern to match WHAT against, using backward matching (see
|
---|
881 | match_backwards() in utils.c).
|
---|
882 |
|
---|
883 | If an element of LIST matched, 1 is returned, 0 otherwise. */
|
---|
884 | int
|
---|
885 | sufmatch (const char **list, const char *what)
|
---|
886 | {
|
---|
887 | int i, j, k, lw;
|
---|
888 |
|
---|
889 | lw = strlen (what);
|
---|
890 | for (i = 0; list[i]; i++)
|
---|
891 | {
|
---|
892 | for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
|
---|
893 | if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
|
---|
894 | break;
|
---|
895 | /* The domain must be first to reach to beginning. */
|
---|
896 | if (j == -1)
|
---|
897 | return 1;
|
---|
898 | }
|
---|
899 | return 0;
|
---|
900 | }
|
---|
901 |
|
---|
902 | static int
|
---|
903 | host_cleanup_mapper (void *key, void *value, void *arg_ignored)
|
---|
904 | {
|
---|
905 | struct address_list *al;
|
---|
906 |
|
---|
907 | xfree (key); /* host */
|
---|
908 |
|
---|
909 | al = (struct address_list *)value;
|
---|
910 | assert (al->refcount == 1);
|
---|
911 | address_list_delete (al);
|
---|
912 |
|
---|
913 | return 0;
|
---|
914 | }
|
---|
915 |
|
---|
916 | void
|
---|
917 | host_cleanup (void)
|
---|
918 | {
|
---|
919 | if (host_name_addresses_map)
|
---|
920 | {
|
---|
921 | hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
|
---|
922 | hash_table_destroy (host_name_addresses_map);
|
---|
923 | host_name_addresses_map = NULL;
|
---|
924 | }
|
---|
925 | }
|
---|