| 1 | /* Host name resolution and matching.
|
|---|
| 2 | Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
|
|---|
| 3 |
|
|---|
| 4 | This file is part of GNU Wget.
|
|---|
| 5 |
|
|---|
| 6 | GNU Wget is free software; you can redistribute it and/or modify
|
|---|
| 7 | it under the terms of the GNU General Public License as published by
|
|---|
| 8 | the Free Software Foundation; either version 2 of the License, or
|
|---|
| 9 | (at your option) any later version.
|
|---|
| 10 |
|
|---|
| 11 | GNU Wget is distributed in the hope that it will be useful,
|
|---|
| 12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 14 | GNU General Public License for more details.
|
|---|
| 15 |
|
|---|
| 16 | You should have received a copy of the GNU General Public License
|
|---|
| 17 | along with Wget; if not, write to the Free Software
|
|---|
| 18 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|---|
| 19 |
|
|---|
| 20 | In addition, as a special exception, the Free Software Foundation
|
|---|
| 21 | gives permission to link the code of its release of Wget with the
|
|---|
| 22 | OpenSSL project's "OpenSSL" library (or with modified versions of it
|
|---|
| 23 | that use the same license as the "OpenSSL" library), and distribute
|
|---|
| 24 | the linked executables. You must obey the GNU General Public License
|
|---|
| 25 | in all respects for all of the code used other than "OpenSSL". If you
|
|---|
| 26 | modify this file, you may extend this exception to your version of the
|
|---|
| 27 | file, but you are not obligated to do so. If you do not wish to do
|
|---|
| 28 | so, delete this exception statement from your version. */
|
|---|
| 29 |
|
|---|
| 30 | #include <config.h>
|
|---|
| 31 |
|
|---|
| 32 | #include <stdio.h>
|
|---|
| 33 | #include <stdlib.h>
|
|---|
| 34 | #ifdef HAVE_STRING_H
|
|---|
| 35 | # include <string.h>
|
|---|
| 36 | #else
|
|---|
| 37 | # include <strings.h>
|
|---|
| 38 | #endif
|
|---|
| 39 | #include <assert.h>
|
|---|
| 40 | #include <sys/types.h>
|
|---|
| 41 |
|
|---|
| 42 | #ifndef WINDOWS
|
|---|
| 43 | # include <sys/socket.h>
|
|---|
| 44 | # include <netinet/in.h>
|
|---|
| 45 | # ifndef __BEOS__
|
|---|
| 46 | # include <arpa/inet.h>
|
|---|
| 47 | # endif
|
|---|
| 48 | # include <netdb.h>
|
|---|
| 49 | # define SET_H_ERRNO(err) ((void)(h_errno = (err)))
|
|---|
| 50 | #else /* WINDOWS */
|
|---|
| 51 | # define SET_H_ERRNO(err) WSASetLastError (err)
|
|---|
| 52 | #endif /* WINDOWS */
|
|---|
| 53 |
|
|---|
| 54 | #include <errno.h>
|
|---|
| 55 |
|
|---|
| 56 | #include "wget.h"
|
|---|
| 57 | #include "utils.h"
|
|---|
| 58 | #include "host.h"
|
|---|
| 59 | #include "url.h"
|
|---|
| 60 | #include "hash.h"
|
|---|
| 61 |
|
|---|
| 62 | #ifndef errno
|
|---|
| 63 | extern int errno;
|
|---|
| 64 | #endif
|
|---|
| 65 |
|
|---|
| 66 | #ifndef h_errno
|
|---|
| 67 | # ifndef __CYGWIN__
|
|---|
| 68 | extern int h_errno;
|
|---|
| 69 | # endif
|
|---|
| 70 | #endif
|
|---|
| 71 |
|
|---|
| 72 | #ifndef NO_ADDRESS
|
|---|
| 73 | # define NO_ADDRESS NO_DATA
|
|---|
| 74 | #endif
|
|---|
| 75 |
|
|---|
| 76 | /* Lists of IP addresses that result from running DNS queries. See
|
|---|
| 77 | lookup_host for details. */
|
|---|
| 78 |
|
|---|
| 79 | struct address_list {
|
|---|
| 80 | int count; /* number of adrresses */
|
|---|
| 81 | ip_address *addresses; /* pointer to the string of addresses */
|
|---|
| 82 |
|
|---|
| 83 | int faulty; /* number of addresses known not to work. */
|
|---|
| 84 | int connected; /* whether we were able to connect to
|
|---|
| 85 | one of the addresses in the list,
|
|---|
| 86 | at least once. */
|
|---|
| 87 |
|
|---|
| 88 | int refcount; /* reference count; when it drops to
|
|---|
| 89 | 0, the entry is freed. */
|
|---|
| 90 | };
|
|---|
| 91 |
|
|---|
| 92 | /* Get the bounds of the address list. */
|
|---|
| 93 |
|
|---|
| 94 | void
|
|---|
| 95 | address_list_get_bounds (const struct address_list *al, int *start, int *end)
|
|---|
| 96 | {
|
|---|
| 97 | *start = al->faulty;
|
|---|
| 98 | *end = al->count;
|
|---|
| 99 | }
|
|---|
| 100 |
|
|---|
| 101 | /* Return a pointer to the address at position POS. */
|
|---|
| 102 |
|
|---|
| 103 | const ip_address *
|
|---|
| 104 | address_list_address_at (const struct address_list *al, int pos)
|
|---|
| 105 | {
|
|---|
| 106 | assert (pos >= al->faulty && pos < al->count);
|
|---|
| 107 | return al->addresses + pos;
|
|---|
| 108 | }
|
|---|
| 109 |
|
|---|
| 110 | /* Return non-zero if AL contains IP, zero otherwise. */
|
|---|
| 111 |
|
|---|
| 112 | int
|
|---|
| 113 | address_list_contains (const struct address_list *al, const ip_address *ip)
|
|---|
| 114 | {
|
|---|
| 115 | int i;
|
|---|
| 116 | switch (ip->type)
|
|---|
| 117 | {
|
|---|
| 118 | case IPV4_ADDRESS:
|
|---|
| 119 | for (i = 0; i < al->count; i++)
|
|---|
| 120 | {
|
|---|
| 121 | ip_address *cur = al->addresses + i;
|
|---|
| 122 | if (cur->type == IPV4_ADDRESS
|
|---|
| 123 | && (ADDRESS_IPV4_IN_ADDR (cur).s_addr
|
|---|
| 124 | ==
|
|---|
| 125 | ADDRESS_IPV4_IN_ADDR (ip).s_addr))
|
|---|
| 126 | return 1;
|
|---|
| 127 | }
|
|---|
| 128 | return 0;
|
|---|
| 129 | #ifdef ENABLE_IPV6
|
|---|
| 130 | case IPV6_ADDRESS:
|
|---|
| 131 | for (i = 0; i < al->count; i++)
|
|---|
| 132 | {
|
|---|
| 133 | ip_address *cur = al->addresses + i;
|
|---|
| 134 | if (cur->type == IPV6_ADDRESS
|
|---|
| 135 | #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
|
|---|
| 136 | && ADDRESS_IPV6_SCOPE (cur) == ADDRESS_IPV6_SCOPE (ip)
|
|---|
| 137 | #endif
|
|---|
| 138 | && IN6_ARE_ADDR_EQUAL (&ADDRESS_IPV6_IN6_ADDR (cur),
|
|---|
| 139 | &ADDRESS_IPV6_IN6_ADDR (ip)))
|
|---|
| 140 | return 1;
|
|---|
| 141 | }
|
|---|
| 142 | return 0;
|
|---|
| 143 | #endif /* ENABLE_IPV6 */
|
|---|
| 144 | default:
|
|---|
| 145 | abort ();
|
|---|
| 146 | }
|
|---|
| 147 | }
|
|---|
| 148 |
|
|---|
| 149 | /* Mark the INDEXth element of AL as faulty, so that the next time
|
|---|
| 150 | this address list is used, the faulty element will be skipped. */
|
|---|
| 151 |
|
|---|
| 152 | void
|
|---|
| 153 | address_list_set_faulty (struct address_list *al, int index)
|
|---|
| 154 | {
|
|---|
| 155 | /* We assume that the address list is traversed in order, so that a
|
|---|
| 156 | "faulty" attempt is always preceded with all-faulty addresses,
|
|---|
| 157 | and this is how Wget uses it. */
|
|---|
| 158 | assert (index == al->faulty);
|
|---|
| 159 |
|
|---|
| 160 | ++al->faulty;
|
|---|
| 161 | if (al->faulty >= al->count)
|
|---|
| 162 | /* All addresses have been proven faulty. Since there's not much
|
|---|
| 163 | sense in returning the user an empty address list the next
|
|---|
| 164 | time, we'll rather make them all clean, so that they can be
|
|---|
| 165 | retried anew. */
|
|---|
| 166 | al->faulty = 0;
|
|---|
| 167 | }
|
|---|
| 168 |
|
|---|
| 169 | /* Set the "connected" flag to true. This flag used by connect.c to
|
|---|
| 170 | see if the host perhaps needs to be resolved again. */
|
|---|
| 171 |
|
|---|
| 172 | void
|
|---|
| 173 | address_list_set_connected (struct address_list *al)
|
|---|
| 174 | {
|
|---|
| 175 | al->connected = 1;
|
|---|
| 176 | }
|
|---|
| 177 |
|
|---|
| 178 | /* Return the value of the "connected" flag. */
|
|---|
| 179 |
|
|---|
| 180 | int
|
|---|
| 181 | address_list_connected_p (const struct address_list *al)
|
|---|
| 182 | {
|
|---|
| 183 | return al->connected;
|
|---|
| 184 | }
|
|---|
| 185 |
|
|---|
| 186 | #ifdef ENABLE_IPV6
|
|---|
| 187 |
|
|---|
| 188 | /* Create an address_list from the addresses in the given struct
|
|---|
| 189 | addrinfo. */
|
|---|
| 190 |
|
|---|
| 191 | static struct address_list *
|
|---|
| 192 | address_list_from_addrinfo (const struct addrinfo *ai)
|
|---|
| 193 | {
|
|---|
| 194 | struct address_list *al;
|
|---|
| 195 | const struct addrinfo *ptr;
|
|---|
| 196 | int cnt;
|
|---|
| 197 | ip_address *ip;
|
|---|
| 198 |
|
|---|
| 199 | cnt = 0;
|
|---|
| 200 | for (ptr = ai; ptr != NULL ; ptr = ptr->ai_next)
|
|---|
| 201 | if (ptr->ai_family == AF_INET || ptr->ai_family == AF_INET6)
|
|---|
| 202 | ++cnt;
|
|---|
| 203 | if (cnt == 0)
|
|---|
| 204 | return NULL;
|
|---|
| 205 |
|
|---|
| 206 | al = xnew0 (struct address_list);
|
|---|
| 207 | al->addresses = xnew_array (ip_address, cnt);
|
|---|
| 208 | al->count = cnt;
|
|---|
| 209 | al->refcount = 1;
|
|---|
| 210 |
|
|---|
| 211 | ip = al->addresses;
|
|---|
| 212 | for (ptr = ai; ptr != NULL; ptr = ptr->ai_next)
|
|---|
| 213 | if (ptr->ai_family == AF_INET6)
|
|---|
| 214 | {
|
|---|
| 215 | const struct sockaddr_in6 *sin6 =
|
|---|
| 216 | (const struct sockaddr_in6 *)ptr->ai_addr;
|
|---|
| 217 | ip->type = IPV6_ADDRESS;
|
|---|
| 218 | ADDRESS_IPV6_IN6_ADDR (ip) = sin6->sin6_addr;
|
|---|
| 219 | #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
|
|---|
| 220 | ADDRESS_IPV6_SCOPE (ip) = sin6->sin6_scope_id;
|
|---|
| 221 | #endif
|
|---|
| 222 | ++ip;
|
|---|
| 223 | }
|
|---|
| 224 | else if (ptr->ai_family == AF_INET)
|
|---|
| 225 | {
|
|---|
| 226 | const struct sockaddr_in *sin =
|
|---|
| 227 | (const struct sockaddr_in *)ptr->ai_addr;
|
|---|
| 228 | ip->type = IPV4_ADDRESS;
|
|---|
| 229 | ADDRESS_IPV4_IN_ADDR (ip) = sin->sin_addr;
|
|---|
| 230 | ++ip;
|
|---|
| 231 | }
|
|---|
| 232 | assert (ip - al->addresses == cnt);
|
|---|
| 233 | return al;
|
|---|
| 234 | }
|
|---|
| 235 |
|
|---|
| 236 | #define IS_IPV4(addr) (((const ip_address *) addr)->type == IPV4_ADDRESS)
|
|---|
| 237 |
|
|---|
| 238 | /* Compare two IP addresses by type, giving preference to the IPv4
|
|---|
| 239 | address (sorting it first). In other words, return -1 if ADDR1 is
|
|---|
| 240 | IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and
|
|---|
| 241 | 0 otherwise.
|
|---|
| 242 |
|
|---|
| 243 | This is intended to be used as the comparator arg to a qsort-like
|
|---|
| 244 | sorting function, which is why it accepts generic pointers. */
|
|---|
| 245 |
|
|---|
| 246 | static int
|
|---|
| 247 | cmp_prefer_ipv4 (const void *addr1, const void *addr2)
|
|---|
| 248 | {
|
|---|
| 249 | return !IS_IPV4 (addr1) - !IS_IPV4 (addr2);
|
|---|
| 250 | }
|
|---|
| 251 |
|
|---|
| 252 | #define IS_IPV6(addr) (((const ip_address *) addr)->type == IPV6_ADDRESS)
|
|---|
| 253 |
|
|---|
| 254 | /* Like the above, but give preference to the IPv6 address. */
|
|---|
| 255 |
|
|---|
| 256 | static int
|
|---|
| 257 | cmp_prefer_ipv6 (const void *addr1, const void *addr2)
|
|---|
| 258 | {
|
|---|
| 259 | return !IS_IPV6 (addr1) - !IS_IPV6 (addr2);
|
|---|
| 260 | }
|
|---|
| 261 |
|
|---|
| 262 | #else /* not ENABLE_IPV6 */
|
|---|
| 263 |
|
|---|
| 264 | /* Create an address_list from a NULL-terminated vector of IPv4
|
|---|
| 265 | addresses. This kind of vector is returned by gethostbyname. */
|
|---|
| 266 |
|
|---|
| 267 | static struct address_list *
|
|---|
| 268 | address_list_from_ipv4_addresses (char **vec)
|
|---|
| 269 | {
|
|---|
| 270 | int count, i;
|
|---|
| 271 | struct address_list *al = xnew0 (struct address_list);
|
|---|
| 272 |
|
|---|
| 273 | count = 0;
|
|---|
| 274 | while (vec[count])
|
|---|
| 275 | ++count;
|
|---|
| 276 | assert (count > 0);
|
|---|
| 277 |
|
|---|
| 278 | al->addresses = xnew_array (ip_address, count);
|
|---|
| 279 | al->count = count;
|
|---|
| 280 | al->refcount = 1;
|
|---|
| 281 |
|
|---|
| 282 | for (i = 0; i < count; i++)
|
|---|
| 283 | {
|
|---|
| 284 | ip_address *ip = &al->addresses[i];
|
|---|
| 285 | ip->type = IPV4_ADDRESS;
|
|---|
| 286 | memcpy (ADDRESS_IPV4_DATA (ip), vec[i], 4);
|
|---|
| 287 | }
|
|---|
| 288 |
|
|---|
| 289 | return al;
|
|---|
| 290 | }
|
|---|
| 291 |
|
|---|
| 292 | #endif /* not ENABLE_IPV6 */
|
|---|
| 293 |
|
|---|
| 294 | static void
|
|---|
| 295 | address_list_delete (struct address_list *al)
|
|---|
| 296 | {
|
|---|
| 297 | xfree (al->addresses);
|
|---|
| 298 | xfree (al);
|
|---|
| 299 | }
|
|---|
| 300 |
|
|---|
| 301 | /* Mark the address list as being no longer in use. This will reduce
|
|---|
| 302 | its reference count which will cause the list to be freed when the
|
|---|
| 303 | count reaches 0. */
|
|---|
| 304 |
|
|---|
| 305 | void
|
|---|
| 306 | address_list_release (struct address_list *al)
|
|---|
| 307 | {
|
|---|
| 308 | --al->refcount;
|
|---|
| 309 | DEBUGP (("Releasing 0x%0*lx (new refcount %d).\n", PTR_FORMAT (al),
|
|---|
| 310 | al->refcount));
|
|---|
| 311 | if (al->refcount <= 0)
|
|---|
| 312 | {
|
|---|
| 313 | DEBUGP (("Deleting unused 0x%0*lx.\n", PTR_FORMAT (al)));
|
|---|
| 314 | address_list_delete (al);
|
|---|
| 315 | }
|
|---|
| 316 | }
|
|---|
| 317 | |
|---|
| 318 |
|
|---|
| 319 | /* Versions of gethostbyname and getaddrinfo that support timeout. */
|
|---|
| 320 |
|
|---|
| 321 | #ifndef ENABLE_IPV6
|
|---|
| 322 |
|
|---|
| 323 | struct ghbnwt_context {
|
|---|
| 324 | const char *host_name;
|
|---|
| 325 | struct hostent *hptr;
|
|---|
| 326 | };
|
|---|
| 327 |
|
|---|
| 328 | static void
|
|---|
| 329 | gethostbyname_with_timeout_callback (void *arg)
|
|---|
| 330 | {
|
|---|
| 331 | struct ghbnwt_context *ctx = (struct ghbnwt_context *)arg;
|
|---|
| 332 | ctx->hptr = gethostbyname (ctx->host_name);
|
|---|
| 333 | }
|
|---|
| 334 |
|
|---|
| 335 | /* Just like gethostbyname, except it times out after TIMEOUT seconds.
|
|---|
| 336 | In case of timeout, NULL is returned and errno is set to ETIMEDOUT.
|
|---|
| 337 | The function makes sure that when NULL is returned for reasons
|
|---|
| 338 | other than timeout, errno is reset. */
|
|---|
| 339 |
|
|---|
| 340 | static struct hostent *
|
|---|
| 341 | gethostbyname_with_timeout (const char *host_name, double timeout)
|
|---|
| 342 | {
|
|---|
| 343 | struct ghbnwt_context ctx;
|
|---|
| 344 | ctx.host_name = host_name;
|
|---|
| 345 | if (run_with_timeout (timeout, gethostbyname_with_timeout_callback, &ctx))
|
|---|
| 346 | {
|
|---|
| 347 | SET_H_ERRNO (HOST_NOT_FOUND);
|
|---|
| 348 | errno = ETIMEDOUT;
|
|---|
| 349 | return NULL;
|
|---|
| 350 | }
|
|---|
| 351 | if (!ctx.hptr)
|
|---|
| 352 | errno = 0;
|
|---|
| 353 | return ctx.hptr;
|
|---|
| 354 | }
|
|---|
| 355 |
|
|---|
| 356 | /* Print error messages for host errors. */
|
|---|
| 357 | static char *
|
|---|
| 358 | host_errstr (int error)
|
|---|
| 359 | {
|
|---|
| 360 | /* Can't use switch since some of these constants can be equal,
|
|---|
| 361 | which makes the compiler complain about duplicate case
|
|---|
| 362 | values. */
|
|---|
| 363 | if (error == HOST_NOT_FOUND
|
|---|
| 364 | || error == NO_RECOVERY
|
|---|
| 365 | || error == NO_DATA
|
|---|
| 366 | || error == NO_ADDRESS)
|
|---|
| 367 | return _("Unknown host");
|
|---|
| 368 | else if (error == TRY_AGAIN)
|
|---|
| 369 | /* Message modeled after what gai_strerror returns in similar
|
|---|
| 370 | circumstances. */
|
|---|
| 371 | return _("Temporary failure in name resolution");
|
|---|
| 372 | else
|
|---|
| 373 | return _("Unknown error");
|
|---|
| 374 | }
|
|---|
| 375 |
|
|---|
| 376 | #else /* ENABLE_IPV6 */
|
|---|
| 377 |
|
|---|
| 378 | struct gaiwt_context {
|
|---|
| 379 | const char *node;
|
|---|
| 380 | const char *service;
|
|---|
| 381 | const struct addrinfo *hints;
|
|---|
| 382 | struct addrinfo **res;
|
|---|
| 383 | int exit_code;
|
|---|
| 384 | };
|
|---|
| 385 |
|
|---|
| 386 | static void
|
|---|
| 387 | getaddrinfo_with_timeout_callback (void *arg)
|
|---|
| 388 | {
|
|---|
| 389 | struct gaiwt_context *ctx = (struct gaiwt_context *)arg;
|
|---|
| 390 | ctx->exit_code = getaddrinfo (ctx->node, ctx->service, ctx->hints, ctx->res);
|
|---|
| 391 | }
|
|---|
| 392 |
|
|---|
| 393 | /* Just like getaddrinfo, except it times out after TIMEOUT seconds.
|
|---|
| 394 | In case of timeout, the EAI_SYSTEM error code is returned and errno
|
|---|
| 395 | is set to ETIMEDOUT. */
|
|---|
| 396 |
|
|---|
| 397 | static int
|
|---|
| 398 | getaddrinfo_with_timeout (const char *node, const char *service,
|
|---|
| 399 | const struct addrinfo *hints, struct addrinfo **res,
|
|---|
| 400 | double timeout)
|
|---|
| 401 | {
|
|---|
| 402 | struct gaiwt_context ctx;
|
|---|
| 403 | ctx.node = node;
|
|---|
| 404 | ctx.service = service;
|
|---|
| 405 | ctx.hints = hints;
|
|---|
| 406 | ctx.res = res;
|
|---|
| 407 |
|
|---|
| 408 | if (run_with_timeout (timeout, getaddrinfo_with_timeout_callback, &ctx))
|
|---|
| 409 | {
|
|---|
| 410 | errno = ETIMEDOUT;
|
|---|
| 411 | return EAI_SYSTEM;
|
|---|
| 412 | }
|
|---|
| 413 | return ctx.exit_code;
|
|---|
| 414 | }
|
|---|
| 415 |
|
|---|
| 416 | #endif /* ENABLE_IPV6 */
|
|---|
| 417 | |
|---|
| 418 |
|
|---|
| 419 | /* Pretty-print ADDR. When compiled without IPv6, this is the same as
|
|---|
| 420 | inet_ntoa. With IPv6, it either prints an IPv6 address or an IPv4
|
|---|
| 421 | address. */
|
|---|
| 422 |
|
|---|
| 423 | const char *
|
|---|
| 424 | pretty_print_address (const ip_address *addr)
|
|---|
| 425 | {
|
|---|
| 426 | switch (addr->type)
|
|---|
| 427 | {
|
|---|
| 428 | case IPV4_ADDRESS:
|
|---|
| 429 | return inet_ntoa (ADDRESS_IPV4_IN_ADDR (addr));
|
|---|
| 430 | #ifdef ENABLE_IPV6
|
|---|
| 431 | case IPV6_ADDRESS:
|
|---|
| 432 | {
|
|---|
| 433 | static char buf[128];
|
|---|
| 434 | inet_ntop (AF_INET6, &ADDRESS_IPV6_IN6_ADDR (addr), buf, sizeof (buf));
|
|---|
| 435 | #if 0
|
|---|
| 436 | #ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
|
|---|
| 437 | {
|
|---|
| 438 | /* append "%SCOPE_ID" for all ?non-global? addresses */
|
|---|
| 439 | char *p = buf + strlen (buf);
|
|---|
| 440 | *p++ = '%';
|
|---|
| 441 | number_to_string (p, ADDRESS_IPV6_SCOPE (addr));
|
|---|
| 442 | }
|
|---|
| 443 | #endif
|
|---|
| 444 | #endif
|
|---|
| 445 | buf[sizeof (buf) - 1] = '\0';
|
|---|
| 446 | return buf;
|
|---|
| 447 | }
|
|---|
| 448 | #endif
|
|---|
| 449 | }
|
|---|
| 450 | abort ();
|
|---|
| 451 | }
|
|---|
| 452 |
|
|---|
| 453 | /* The following two functions were adapted from glibc. */
|
|---|
| 454 |
|
|---|
| 455 | static int
|
|---|
| 456 | is_valid_ipv4_address (const char *str, const char *end)
|
|---|
| 457 | {
|
|---|
| 458 | int saw_digit = 0;
|
|---|
| 459 | int octets = 0;
|
|---|
| 460 | int val = 0;
|
|---|
| 461 |
|
|---|
| 462 | while (str < end)
|
|---|
| 463 | {
|
|---|
| 464 | int ch = *str++;
|
|---|
| 465 |
|
|---|
| 466 | if (ch >= '0' && ch <= '9')
|
|---|
| 467 | {
|
|---|
| 468 | val = val * 10 + (ch - '0');
|
|---|
| 469 |
|
|---|
| 470 | if (val > 255)
|
|---|
| 471 | return 0;
|
|---|
| 472 | if (saw_digit == 0)
|
|---|
| 473 | {
|
|---|
| 474 | if (++octets > 4)
|
|---|
| 475 | return 0;
|
|---|
| 476 | saw_digit = 1;
|
|---|
| 477 | }
|
|---|
| 478 | }
|
|---|
| 479 | else if (ch == '.' && saw_digit == 1)
|
|---|
| 480 | {
|
|---|
| 481 | if (octets == 4)
|
|---|
| 482 | return 0;
|
|---|
| 483 | val = 0;
|
|---|
| 484 | saw_digit = 0;
|
|---|
| 485 | }
|
|---|
| 486 | else
|
|---|
| 487 | return 0;
|
|---|
| 488 | }
|
|---|
| 489 | if (octets < 4)
|
|---|
| 490 | return 0;
|
|---|
| 491 |
|
|---|
| 492 | return 1;
|
|---|
| 493 | }
|
|---|
| 494 |
|
|---|
| 495 | int
|
|---|
| 496 | is_valid_ipv6_address (const char *str, const char *end)
|
|---|
| 497 | {
|
|---|
| 498 | /* Use lower-case for these to avoid clash with system headers. */
|
|---|
| 499 | enum {
|
|---|
| 500 | ns_inaddrsz = 4,
|
|---|
| 501 | ns_in6addrsz = 16,
|
|---|
| 502 | ns_int16sz = 2
|
|---|
| 503 | };
|
|---|
| 504 |
|
|---|
| 505 | const char *curtok;
|
|---|
| 506 | int tp;
|
|---|
| 507 | const char *colonp;
|
|---|
| 508 | int saw_xdigit;
|
|---|
| 509 | unsigned int val;
|
|---|
| 510 |
|
|---|
| 511 | tp = 0;
|
|---|
| 512 | colonp = NULL;
|
|---|
| 513 |
|
|---|
| 514 | if (str == end)
|
|---|
| 515 | return 0;
|
|---|
| 516 |
|
|---|
| 517 | /* Leading :: requires some special handling. */
|
|---|
| 518 | if (*str == ':')
|
|---|
| 519 | {
|
|---|
| 520 | ++str;
|
|---|
| 521 | if (str == end || *str != ':')
|
|---|
| 522 | return 0;
|
|---|
| 523 | }
|
|---|
| 524 |
|
|---|
| 525 | curtok = str;
|
|---|
| 526 | saw_xdigit = 0;
|
|---|
| 527 | val = 0;
|
|---|
| 528 |
|
|---|
| 529 | while (str < end)
|
|---|
| 530 | {
|
|---|
| 531 | int ch = *str++;
|
|---|
| 532 |
|
|---|
| 533 | /* if ch is a number, add it to val. */
|
|---|
| 534 | if (ISXDIGIT (ch))
|
|---|
| 535 | {
|
|---|
| 536 | val <<= 4;
|
|---|
| 537 | val |= XDIGIT_TO_NUM (ch);
|
|---|
| 538 | if (val > 0xffff)
|
|---|
| 539 | return 0;
|
|---|
| 540 | saw_xdigit = 1;
|
|---|
| 541 | continue;
|
|---|
| 542 | }
|
|---|
| 543 |
|
|---|
| 544 | /* if ch is a colon ... */
|
|---|
| 545 | if (ch == ':')
|
|---|
| 546 | {
|
|---|
| 547 | curtok = str;
|
|---|
| 548 | if (saw_xdigit == 0)
|
|---|
| 549 | {
|
|---|
| 550 | if (colonp != NULL)
|
|---|
| 551 | return 0;
|
|---|
| 552 | colonp = str + tp;
|
|---|
| 553 | continue;
|
|---|
| 554 | }
|
|---|
| 555 | else if (str == end)
|
|---|
| 556 | return 0;
|
|---|
| 557 | if (tp > ns_in6addrsz - ns_int16sz)
|
|---|
| 558 | return 0;
|
|---|
| 559 | tp += ns_int16sz;
|
|---|
| 560 | saw_xdigit = 0;
|
|---|
| 561 | val = 0;
|
|---|
| 562 | continue;
|
|---|
| 563 | }
|
|---|
| 564 |
|
|---|
| 565 | /* if ch is a dot ... */
|
|---|
| 566 | if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz)
|
|---|
| 567 | && is_valid_ipv4_address (curtok, end) == 1)
|
|---|
| 568 | {
|
|---|
| 569 | tp += ns_inaddrsz;
|
|---|
| 570 | saw_xdigit = 0;
|
|---|
| 571 | break;
|
|---|
| 572 | }
|
|---|
| 573 |
|
|---|
| 574 | return 0;
|
|---|
| 575 | }
|
|---|
| 576 |
|
|---|
| 577 | if (saw_xdigit == 1)
|
|---|
| 578 | {
|
|---|
| 579 | if (tp > ns_in6addrsz - ns_int16sz)
|
|---|
| 580 | return 0;
|
|---|
| 581 | tp += ns_int16sz;
|
|---|
| 582 | }
|
|---|
| 583 |
|
|---|
| 584 | if (colonp != NULL)
|
|---|
| 585 | {
|
|---|
| 586 | if (tp == ns_in6addrsz)
|
|---|
| 587 | return 0;
|
|---|
| 588 | tp = ns_in6addrsz;
|
|---|
| 589 | }
|
|---|
| 590 |
|
|---|
| 591 | if (tp != ns_in6addrsz)
|
|---|
| 592 | return 0;
|
|---|
| 593 |
|
|---|
| 594 | return 1;
|
|---|
| 595 | }
|
|---|
| 596 | |
|---|
| 597 |
|
|---|
| 598 | /* Simple host cache, used by lookup_host to speed up resolving. The
|
|---|
| 599 | cache doesn't handle TTL because Wget is a fairly short-lived
|
|---|
| 600 | application. Refreshing is attempted when connect fails, though --
|
|---|
| 601 | see connect_to_host. */
|
|---|
| 602 |
|
|---|
| 603 | /* Mapping between known hosts and to lists of their addresses. */
|
|---|
| 604 | static struct hash_table *host_name_addresses_map;
|
|---|
| 605 |
|
|---|
| 606 |
|
|---|
| 607 | /* Return the host's resolved addresses from the cache, if
|
|---|
| 608 | available. */
|
|---|
| 609 |
|
|---|
| 610 | static struct address_list *
|
|---|
| 611 | cache_query (const char *host)
|
|---|
| 612 | {
|
|---|
| 613 | struct address_list *al;
|
|---|
| 614 | if (!host_name_addresses_map)
|
|---|
| 615 | return NULL;
|
|---|
| 616 | al = hash_table_get (host_name_addresses_map, host);
|
|---|
| 617 | if (al)
|
|---|
| 618 | {
|
|---|
| 619 | DEBUGP (("Found %s in host_name_addresses_map (%p)\n", host, al));
|
|---|
| 620 | ++al->refcount;
|
|---|
| 621 | return al;
|
|---|
| 622 | }
|
|---|
| 623 | return NULL;
|
|---|
| 624 | }
|
|---|
| 625 |
|
|---|
| 626 | /* Cache the DNS lookup of HOST. Subsequent invocations of
|
|---|
| 627 | lookup_host will return the cached value. */
|
|---|
| 628 |
|
|---|
| 629 | static void
|
|---|
| 630 | cache_store (const char *host, struct address_list *al)
|
|---|
| 631 | {
|
|---|
| 632 | if (!host_name_addresses_map)
|
|---|
| 633 | host_name_addresses_map = make_nocase_string_hash_table (0);
|
|---|
| 634 |
|
|---|
| 635 | ++al->refcount;
|
|---|
| 636 | hash_table_put (host_name_addresses_map, xstrdup_lower (host), al);
|
|---|
| 637 |
|
|---|
| 638 | #ifdef ENABLE_DEBUG
|
|---|
| 639 | if (opt.debug)
|
|---|
| 640 | {
|
|---|
| 641 | int i;
|
|---|
| 642 | debug_logprintf ("Caching %s =>", host);
|
|---|
| 643 | for (i = 0; i < al->count; i++)
|
|---|
| 644 | debug_logprintf (" %s", pretty_print_address (al->addresses + i));
|
|---|
| 645 | debug_logprintf ("\n");
|
|---|
| 646 | }
|
|---|
| 647 | #endif
|
|---|
| 648 | }
|
|---|
| 649 |
|
|---|
| 650 | /* Remove HOST from the DNS cache. Does nothing is HOST is not in
|
|---|
| 651 | the cache. */
|
|---|
| 652 |
|
|---|
| 653 | static void
|
|---|
| 654 | cache_remove (const char *host)
|
|---|
| 655 | {
|
|---|
| 656 | struct address_list *al;
|
|---|
| 657 | if (!host_name_addresses_map)
|
|---|
| 658 | return;
|
|---|
| 659 | al = hash_table_get (host_name_addresses_map, host);
|
|---|
| 660 | if (al)
|
|---|
| 661 | {
|
|---|
| 662 | address_list_release (al);
|
|---|
| 663 | hash_table_remove (host_name_addresses_map, host);
|
|---|
| 664 | }
|
|---|
| 665 | }
|
|---|
| 666 | |
|---|
| 667 |
|
|---|
| 668 | /* Look up HOST in DNS and return a list of IP addresses.
|
|---|
| 669 |
|
|---|
| 670 | This function caches its result so that, if the same host is passed
|
|---|
| 671 | the second time, the addresses are returned without DNS lookup.
|
|---|
| 672 | (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to
|
|---|
| 673 | globally disable caching.)
|
|---|
| 674 |
|
|---|
| 675 | The order of the returned addresses is affected by the setting of
|
|---|
| 676 | opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are
|
|---|
| 677 | placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed
|
|---|
| 678 | at the beginning; otherwise, the order is left intact. The
|
|---|
| 679 | relative order of addresses with the same family is left
|
|---|
| 680 | undisturbed in either case.
|
|---|
| 681 |
|
|---|
| 682 | FLAGS can be a combination of:
|
|---|
| 683 | LH_SILENT - don't print the "resolving ... done" messages.
|
|---|
| 684 | LH_BIND - resolve addresses for use with bind, which under
|
|---|
| 685 | IPv6 means to use AI_PASSIVE flag to getaddrinfo.
|
|---|
| 686 | Passive lookups are not cached under IPv6.
|
|---|
| 687 | LH_REFRESH - if HOST is cached, remove the entry from the cache
|
|---|
| 688 | and resolve it anew. */
|
|---|
| 689 |
|
|---|
| 690 | struct address_list *
|
|---|
| 691 | lookup_host (const char *host, int flags)
|
|---|
| 692 | {
|
|---|
| 693 | struct address_list *al;
|
|---|
| 694 | int silent = flags & LH_SILENT;
|
|---|
| 695 | int use_cache;
|
|---|
| 696 | int numeric_address = 0;
|
|---|
| 697 | double timeout = opt.dns_timeout;
|
|---|
| 698 |
|
|---|
| 699 | #ifndef ENABLE_IPV6
|
|---|
| 700 | /* If we're not using getaddrinfo, first check if HOST specifies a
|
|---|
| 701 | numeric IPv4 address. Some implementations of gethostbyname
|
|---|
| 702 | (e.g. the Ultrix one and possibly Winsock) don't accept
|
|---|
| 703 | dotted-decimal IPv4 addresses. */
|
|---|
| 704 | {
|
|---|
| 705 | uint32_t addr_ipv4 = (uint32_t)inet_addr (host);
|
|---|
| 706 | if (addr_ipv4 != (uint32_t) -1)
|
|---|
| 707 | {
|
|---|
| 708 | /* No need to cache host->addr relation, just return the
|
|---|
| 709 | address. */
|
|---|
| 710 | char *vec[2];
|
|---|
| 711 | vec[0] = (char *)&addr_ipv4;
|
|---|
| 712 | vec[1] = NULL;
|
|---|
| 713 | return address_list_from_ipv4_addresses (vec);
|
|---|
| 714 | }
|
|---|
| 715 | }
|
|---|
| 716 | #else /* ENABLE_IPV6 */
|
|---|
| 717 | /* If we're using getaddrinfo, at least check whether the address is
|
|---|
| 718 | already numeric, in which case there is no need to print the
|
|---|
| 719 | "Resolving..." output. (This comes at no additional cost since
|
|---|
| 720 | the is_valid_ipv*_address are already required for
|
|---|
| 721 | url_parse.) */
|
|---|
| 722 | {
|
|---|
| 723 | const char *end = host + strlen (host);
|
|---|
| 724 | if (is_valid_ipv4_address (host, end) || is_valid_ipv6_address (host, end))
|
|---|
| 725 | numeric_address = 1;
|
|---|
| 726 | }
|
|---|
| 727 | #endif
|
|---|
| 728 |
|
|---|
| 729 | /* Cache is normally on, but can be turned off with --no-dns-cache.
|
|---|
| 730 | Don't cache passive lookups under IPv6. */
|
|---|
| 731 | use_cache = opt.dns_cache;
|
|---|
| 732 | #ifdef ENABLE_IPV6
|
|---|
| 733 | if ((flags & LH_BIND) || numeric_address)
|
|---|
| 734 | use_cache = 0;
|
|---|
| 735 | #endif
|
|---|
| 736 |
|
|---|
| 737 | /* Try to find the host in the cache so we don't need to talk to the
|
|---|
| 738 | resolver. If LH_REFRESH is requested, remove HOST from the cache
|
|---|
| 739 | instead. */
|
|---|
| 740 | if (use_cache)
|
|---|
| 741 | {
|
|---|
| 742 | if (!(flags & LH_REFRESH))
|
|---|
| 743 | {
|
|---|
| 744 | al = cache_query (host);
|
|---|
| 745 | if (al)
|
|---|
| 746 | return al;
|
|---|
| 747 | }
|
|---|
| 748 | else
|
|---|
| 749 | cache_remove (host);
|
|---|
| 750 | }
|
|---|
| 751 |
|
|---|
| 752 | /* No luck with the cache; resolve HOST. */
|
|---|
| 753 |
|
|---|
| 754 | if (!silent && !numeric_address)
|
|---|
| 755 | logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host));
|
|---|
| 756 |
|
|---|
| 757 | #ifdef ENABLE_IPV6
|
|---|
| 758 | {
|
|---|
| 759 | int err;
|
|---|
| 760 | struct addrinfo hints, *res;
|
|---|
| 761 |
|
|---|
| 762 | xzero (hints);
|
|---|
| 763 | hints.ai_socktype = SOCK_STREAM;
|
|---|
| 764 | if (opt.ipv4_only)
|
|---|
| 765 | hints.ai_family = AF_INET;
|
|---|
| 766 | else if (opt.ipv6_only)
|
|---|
| 767 | hints.ai_family = AF_INET6;
|
|---|
| 768 | else
|
|---|
| 769 | /* We tried using AI_ADDRCONFIG, but removed it because: it
|
|---|
| 770 | misinterprets IPv6 loopbacks, it is broken on AIX 5.1, and
|
|---|
| 771 | it's unneeded since we sort the addresses anyway. */
|
|---|
| 772 | hints.ai_family = AF_UNSPEC;
|
|---|
| 773 |
|
|---|
| 774 | if (flags & LH_BIND)
|
|---|
| 775 | hints.ai_flags |= AI_PASSIVE;
|
|---|
| 776 |
|
|---|
| 777 | #ifdef AI_NUMERICHOST
|
|---|
| 778 | if (numeric_address)
|
|---|
| 779 | {
|
|---|
| 780 | /* Where available, the AI_NUMERICHOST hint can prevent costly
|
|---|
| 781 | access to DNS servers. */
|
|---|
| 782 | hints.ai_flags |= AI_NUMERICHOST;
|
|---|
| 783 | timeout = 0; /* no timeout needed when "resolving"
|
|---|
| 784 | numeric hosts -- avoid setting up
|
|---|
| 785 | signal handlers and such. */
|
|---|
| 786 | }
|
|---|
| 787 | #endif
|
|---|
| 788 |
|
|---|
| 789 | err = getaddrinfo_with_timeout (host, NULL, &hints, &res, timeout);
|
|---|
| 790 | if (err != 0 || res == NULL)
|
|---|
| 791 | {
|
|---|
| 792 | if (!silent)
|
|---|
| 793 | logprintf (LOG_VERBOSE, _("failed: %s.\n"),
|
|---|
| 794 | err != EAI_SYSTEM ? gai_strerror (err) : strerror (errno));
|
|---|
| 795 | return NULL;
|
|---|
| 796 | }
|
|---|
| 797 | al = address_list_from_addrinfo (res);
|
|---|
| 798 | freeaddrinfo (res);
|
|---|
| 799 | if (!al)
|
|---|
| 800 | {
|
|---|
| 801 | logprintf (LOG_VERBOSE,
|
|---|
| 802 | _("failed: No IPv4/IPv6 addresses for host.\n"));
|
|---|
| 803 | return NULL;
|
|---|
| 804 | }
|
|---|
| 805 |
|
|---|
| 806 | /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per
|
|---|
| 807 | --prefer-family) come first. Sorting is stable so the order of
|
|---|
| 808 | the addresses with the same family is undisturbed. */
|
|---|
| 809 | if (al->count > 1 && opt.prefer_family != prefer_none)
|
|---|
| 810 | stable_sort (al->addresses, al->count, sizeof (ip_address),
|
|---|
| 811 | opt.prefer_family == prefer_ipv4
|
|---|
| 812 | ? cmp_prefer_ipv4 : cmp_prefer_ipv6);
|
|---|
| 813 | }
|
|---|
| 814 | #else /* not ENABLE_IPV6 */
|
|---|
| 815 | {
|
|---|
| 816 | struct hostent *hptr = gethostbyname_with_timeout (host, timeout);
|
|---|
| 817 | if (!hptr)
|
|---|
| 818 | {
|
|---|
| 819 | if (!silent)
|
|---|
| 820 | {
|
|---|
| 821 | if (errno != ETIMEDOUT)
|
|---|
| 822 | logprintf (LOG_VERBOSE, _("failed: %s.\n"),
|
|---|
| 823 | host_errstr (h_errno));
|
|---|
| 824 | else
|
|---|
| 825 | logputs (LOG_VERBOSE, _("failed: timed out.\n"));
|
|---|
| 826 | }
|
|---|
| 827 | return NULL;
|
|---|
| 828 | }
|
|---|
| 829 | /* Do older systems have h_addr_list? */
|
|---|
| 830 | al = address_list_from_ipv4_addresses (hptr->h_addr_list);
|
|---|
| 831 | }
|
|---|
| 832 | #endif /* not ENABLE_IPV6 */
|
|---|
| 833 |
|
|---|
| 834 | /* Print the addresses determined by DNS lookup, but no more than
|
|---|
| 835 | three. */
|
|---|
| 836 | if (!silent && !numeric_address)
|
|---|
| 837 | {
|
|---|
| 838 | int i;
|
|---|
| 839 | int printmax = al->count <= 3 ? al->count : 3;
|
|---|
| 840 | for (i = 0; i < printmax; i++)
|
|---|
| 841 | {
|
|---|
| 842 | logprintf (LOG_VERBOSE, "%s",
|
|---|
| 843 | pretty_print_address (al->addresses + i));
|
|---|
| 844 | if (i < printmax - 1)
|
|---|
| 845 | logputs (LOG_VERBOSE, ", ");
|
|---|
| 846 | }
|
|---|
| 847 | if (printmax != al->count)
|
|---|
| 848 | logputs (LOG_VERBOSE, ", ...");
|
|---|
| 849 | logputs (LOG_VERBOSE, "\n");
|
|---|
| 850 | }
|
|---|
| 851 |
|
|---|
| 852 | /* Cache the lookup information. */
|
|---|
| 853 | if (use_cache)
|
|---|
| 854 | cache_store (host, al);
|
|---|
| 855 |
|
|---|
| 856 | return al;
|
|---|
| 857 | }
|
|---|
| 858 | |
|---|
| 859 |
|
|---|
| 860 | /* Determine whether a URL is acceptable to be followed, according to
|
|---|
| 861 | a list of domains to accept. */
|
|---|
| 862 | int
|
|---|
| 863 | accept_domain (struct url *u)
|
|---|
| 864 | {
|
|---|
| 865 | assert (u->host != NULL);
|
|---|
| 866 | if (opt.domains)
|
|---|
| 867 | {
|
|---|
| 868 | if (!sufmatch ((const char **)opt.domains, u->host))
|
|---|
| 869 | return 0;
|
|---|
| 870 | }
|
|---|
| 871 | if (opt.exclude_domains)
|
|---|
| 872 | {
|
|---|
| 873 | if (sufmatch ((const char **)opt.exclude_domains, u->host))
|
|---|
| 874 | return 0;
|
|---|
| 875 | }
|
|---|
| 876 | return 1;
|
|---|
| 877 | }
|
|---|
| 878 |
|
|---|
| 879 | /* Check whether WHAT is matched in LIST, each element of LIST being a
|
|---|
| 880 | pattern to match WHAT against, using backward matching (see
|
|---|
| 881 | match_backwards() in utils.c).
|
|---|
| 882 |
|
|---|
| 883 | If an element of LIST matched, 1 is returned, 0 otherwise. */
|
|---|
| 884 | int
|
|---|
| 885 | sufmatch (const char **list, const char *what)
|
|---|
| 886 | {
|
|---|
| 887 | int i, j, k, lw;
|
|---|
| 888 |
|
|---|
| 889 | lw = strlen (what);
|
|---|
| 890 | for (i = 0; list[i]; i++)
|
|---|
| 891 | {
|
|---|
| 892 | for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
|
|---|
| 893 | if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
|
|---|
| 894 | break;
|
|---|
| 895 | /* The domain must be first to reach to beginning. */
|
|---|
| 896 | if (j == -1)
|
|---|
| 897 | return 1;
|
|---|
| 898 | }
|
|---|
| 899 | return 0;
|
|---|
| 900 | }
|
|---|
| 901 |
|
|---|
| 902 | static int
|
|---|
| 903 | host_cleanup_mapper (void *key, void *value, void *arg_ignored)
|
|---|
| 904 | {
|
|---|
| 905 | struct address_list *al;
|
|---|
| 906 |
|
|---|
| 907 | xfree (key); /* host */
|
|---|
| 908 |
|
|---|
| 909 | al = (struct address_list *)value;
|
|---|
| 910 | assert (al->refcount == 1);
|
|---|
| 911 | address_list_delete (al);
|
|---|
| 912 |
|
|---|
| 913 | return 0;
|
|---|
| 914 | }
|
|---|
| 915 |
|
|---|
| 916 | void
|
|---|
| 917 | host_cleanup (void)
|
|---|
| 918 | {
|
|---|
| 919 | if (host_name_addresses_map)
|
|---|
| 920 | {
|
|---|
| 921 | hash_table_map (host_name_addresses_map, host_cleanup_mapper, NULL);
|
|---|
| 922 | hash_table_destroy (host_name_addresses_map);
|
|---|
| 923 | host_name_addresses_map = NULL;
|
|---|
| 924 | }
|
|---|
| 925 | }
|
|---|