source: trunk/essentials/net-misc/wget/src/utils.c

Last change on this file was 3440, checked in by bird, 18 years ago

wget 1.10.2

File size: 58.9 KB
Line 
1/* Various utility functions.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3
4This file is part of GNU Wget.
5
6GNU Wget is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2 of the License, or
9(at your option) any later version.
10
11GNU Wget is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Wget; if not, write to the Free Software
18Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20In addition, as a special exception, the Free Software Foundation
21gives permission to link the code of its release of Wget with the
22OpenSSL project's "OpenSSL" library (or with modified versions of it
23that use the same license as the "OpenSSL" library), and distribute
24the linked executables. You must obey the GNU General Public License
25in all respects for all of the code used other than "OpenSSL". If you
26modify this file, you may extend this exception to your version of the
27file, but you are not obligated to do so. If you do not wish to do
28so, delete this exception statement from your version. */
29
30#include <config.h>
31
32#include <stdio.h>
33#include <stdlib.h>
34#ifdef HAVE_STRING_H
35# include <string.h>
36#else /* not HAVE_STRING_H */
37# include <strings.h>
38#endif /* not HAVE_STRING_H */
39#include <sys/types.h>
40#ifdef HAVE_UNISTD_H
41# include <unistd.h>
42#endif
43#ifdef HAVE_MMAP
44# include <sys/mman.h>
45#endif
46#ifdef HAVE_PWD_H
47# include <pwd.h>
48#endif
49#ifdef HAVE_LIMITS_H
50# include <limits.h>
51#endif
52#ifdef HAVE_UTIME_H
53# include <utime.h>
54#endif
55#ifdef HAVE_SYS_UTIME_H
56# include <sys/utime.h>
57#endif
58#include <errno.h>
59#ifdef NeXT
60# include <libc.h> /* for access() */
61#endif
62#include <fcntl.h>
63#include <assert.h>
64#ifdef WGET_USE_STDARG
65# include <stdarg.h>
66#else
67# include <varargs.h>
68#endif
69#ifdef HAVE_LOCALE_H
70# include <locale.h>
71#endif
72
73/* For TIOCGWINSZ and friends: */
74#ifdef HAVE_SYS_IOCTL_H
75# include <sys/ioctl.h>
76#endif
77#ifdef HAVE_TERMIOS_H
78# include <termios.h>
79#endif
80
81/* Needed for run_with_timeout. */
82#undef USE_SIGNAL_TIMEOUT
83#ifdef HAVE_SIGNAL_H
84# include <signal.h>
85#endif
86#ifdef HAVE_SETJMP_H
87# include <setjmp.h>
88#endif
89
90#ifndef HAVE_SIGSETJMP
91/* If sigsetjmp is a macro, configure won't pick it up. */
92# ifdef sigsetjmp
93# define HAVE_SIGSETJMP
94# endif
95#endif
96
97#ifdef HAVE_SIGNAL
98# ifdef HAVE_SIGSETJMP
99# define USE_SIGNAL_TIMEOUT
100# endif
101# ifdef HAVE_SIGBLOCK
102# define USE_SIGNAL_TIMEOUT
103# endif
104#endif
105
106#include "wget.h"
107#include "utils.h"
108#include "hash.h"
109
110#ifndef errno
111extern int errno;
112#endif
113
114/* Utility function: like xstrdup(), but also lowercases S. */
115
116char *
117xstrdup_lower (const char *s)
118{
119 char *copy = xstrdup (s);
120 char *p = copy;
121 for (; *p; p++)
122 *p = TOLOWER (*p);
123 return copy;
124}
125
126/* Copy the string formed by two pointers (one on the beginning, other
127 on the char after the last char) to a new, malloc-ed location.
128 0-terminate it. */
129char *
130strdupdelim (const char *beg, const char *end)
131{
132 char *res = (char *)xmalloc (end - beg + 1);
133 memcpy (res, beg, end - beg);
134 res[end - beg] = '\0';
135 return res;
136}
137
138/* Parse a string containing comma-separated elements, and return a
139 vector of char pointers with the elements. Spaces following the
140 commas are ignored. */
141char **
142sepstring (const char *s)
143{
144 char **res;
145 const char *p;
146 int i = 0;
147
148 if (!s || !*s)
149 return NULL;
150 res = NULL;
151 p = s;
152 while (*s)
153 {
154 if (*s == ',')
155 {
156 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
157 res[i] = strdupdelim (p, s);
158 res[++i] = NULL;
159 ++s;
160 /* Skip the blanks following the ','. */
161 while (ISSPACE (*s))
162 ++s;
163 p = s;
164 }
165 else
166 ++s;
167 }
168 res = (char **)xrealloc (res, (i + 2) * sizeof (char *));
169 res[i] = strdupdelim (p, s);
170 res[i + 1] = NULL;
171 return res;
172}
173
174
175#ifdef WGET_USE_STDARG
176# define VA_START(args, arg1) va_start (args, arg1)
177#else
178# define VA_START(args, ignored) va_start (args)
179#endif
180
181/* Like sprintf, but allocates a string of sufficient size with malloc
182 and returns it. GNU libc has a similar function named asprintf,
183 which requires the pointer to the string to be passed. */
184
185char *
186aprintf (const char *fmt, ...)
187{
188 /* This function is implemented using vsnprintf, which we provide
189 for the systems that don't have it. Therefore, it should be 100%
190 portable. */
191
192 int size = 32;
193 char *str = xmalloc (size);
194
195 while (1)
196 {
197 int n;
198 va_list args;
199
200 /* See log_vprintf_internal for explanation why it's OK to rely
201 on the return value of vsnprintf. */
202
203 VA_START (args, fmt);
204 n = vsnprintf (str, size, fmt, args);
205 va_end (args);
206
207 /* If the printing worked, return the string. */
208 if (n > -1 && n < size)
209 return str;
210
211 /* Else try again with a larger buffer. */
212 if (n > -1) /* C99 */
213 size = n + 1; /* precisely what is needed */
214 else
215 size <<= 1; /* twice the old size */
216 str = xrealloc (str, size);
217 }
218}
219
220/* Concatenate the NULL-terminated list of string arguments into
221 freshly allocated space. */
222
223char *
224concat_strings (const char *str0, ...)
225{
226 va_list args;
227 int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
228 char *ret, *p;
229
230 const char *next_str;
231 int total_length = 0;
232 int argcount;
233
234 /* Calculate the length of and allocate the resulting string. */
235
236 argcount = 0;
237 VA_START (args, str0);
238 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
239 {
240 int len = strlen (next_str);
241 if (argcount < countof (saved_lengths))
242 saved_lengths[argcount++] = len;
243 total_length += len;
244 }
245 va_end (args);
246 p = ret = xmalloc (total_length + 1);
247
248 /* Copy the strings into the allocated space. */
249
250 argcount = 0;
251 VA_START (args, str0);
252 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
253 {
254 int len;
255 if (argcount < countof (saved_lengths))
256 len = saved_lengths[argcount++];
257 else
258 len = strlen (next_str);
259 memcpy (p, next_str, len);
260 p += len;
261 }
262 va_end (args);
263 *p = '\0';
264
265 return ret;
266}
267
268
269/* Return pointer to a static char[] buffer in which zero-terminated
270 string-representation of TM (in form hh:mm:ss) is printed.
271
272 If TM is NULL, the current time will be used. */
273
274char *
275time_str (time_t *tm)
276{
277 static char output[15];
278 struct tm *ptm;
279 time_t secs = tm ? *tm : time (NULL);
280
281 if (secs == -1)
282 {
283 /* In case of error, return the empty string. Maybe we should
284 just abort if this happens? */
285 *output = '\0';
286 return output;
287 }
288 ptm = localtime (&secs);
289 sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
290 return output;
291}
292
293/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
294
295char *
296datetime_str (time_t *tm)
297{
298 static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
299 struct tm *ptm;
300 time_t secs = tm ? *tm : time (NULL);
301
302 if (secs == -1)
303 {
304 /* In case of error, return the empty string. Maybe we should
305 just abort if this happens? */
306 *output = '\0';
307 return output;
308 }
309 ptm = localtime (&secs);
310 sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
311 ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
312 ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
313 return output;
314}
315
316
317/* The Windows versions of the following two functions are defined in
318 mswindows.c. */
319
320#ifndef WINDOWS
321void
322fork_to_background (void)
323{
324 pid_t pid;
325 /* Whether we arrange our own version of opt.lfilename here. */
326 int logfile_changed = 0;
327
328 if (!opt.lfilename)
329 {
330 /* We must create the file immediately to avoid either a race
331 condition (which arises from using unique_name and failing to
332 use fopen_excl) or lying to the user about the log file name
333 (which arises from using unique_name, printing the name, and
334 using fopen_excl later on.) */
335 FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, 0, &opt.lfilename);
336 if (new_log_fp)
337 {
338 logfile_changed = 1;
339 fclose (new_log_fp);
340 }
341 }
342 pid = fork ();
343 if (pid < 0)
344 {
345 /* parent, error */
346 perror ("fork");
347 exit (1);
348 }
349 else if (pid != 0)
350 {
351 /* parent, no error */
352 printf (_("Continuing in background, pid %d.\n"), (int)pid);
353 if (logfile_changed)
354 printf (_("Output will be written to `%s'.\n"), opt.lfilename);
355 exit (0); /* #### should we use _exit()? */
356 }
357
358 /* child: give up the privileges and keep running. */
359 setsid ();
360 freopen ("/dev/null", "r", stdin);
361 freopen ("/dev/null", "w", stdout);
362 freopen ("/dev/null", "w", stderr);
363}
364#endif /* not WINDOWS */
365
366
367/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
368 specified with TM. The atime ("access time") is set to the current
369 time. */
370
371void
372touch (const char *file, time_t tm)
373{
374#ifdef HAVE_STRUCT_UTIMBUF
375 struct utimbuf times;
376#else
377 struct {
378 time_t actime;
379 time_t modtime;
380 } times;
381#endif
382 times.modtime = tm;
383 times.actime = time (NULL);
384 if (utime (file, &times) == -1)
385 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
386}
387
388/* Checks if FILE is a symbolic link, and removes it if it is. Does
389 nothing under MS-Windows. */
390int
391remove_link (const char *file)
392{
393 int err = 0;
394 struct_stat st;
395
396 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
397 {
398 DEBUGP (("Unlinking %s (symlink).\n", file));
399 err = unlink (file);
400 if (err != 0)
401 logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
402 file, strerror (errno));
403 }
404 return err;
405}
406
407/* Does FILENAME exist? This is quite a lousy implementation, since
408 it supplies no error codes -- only a yes-or-no answer. Thus it
409 will return that a file does not exist if, e.g., the directory is
410 unreadable. I don't mind it too much currently, though. The
411 proper way should, of course, be to have a third, error state,
412 other than true/false, but that would introduce uncalled-for
413 additional complexity to the callers. */
414int
415file_exists_p (const char *filename)
416{
417#ifdef HAVE_ACCESS
418 return access (filename, F_OK) >= 0;
419#else
420 struct_stat buf;
421 return stat (filename, &buf) >= 0;
422#endif
423}
424
425/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
426 Returns 0 on error. */
427int
428file_non_directory_p (const char *path)
429{
430 struct_stat buf;
431 /* Use lstat() rather than stat() so that symbolic links pointing to
432 directories can be identified correctly. */
433 if (lstat (path, &buf) != 0)
434 return 0;
435 return S_ISDIR (buf.st_mode) ? 0 : 1;
436}
437
438/* Return the size of file named by FILENAME, or -1 if it cannot be
439 opened or seeked into. */
440wgint
441file_size (const char *filename)
442{
443#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
444 wgint size;
445 /* We use fseek rather than stat to determine the file size because
446 that way we can also verify that the file is readable without
447 explicitly checking for permissions. Inspired by the POST patch
448 by Arnaud Wylie. */
449 FILE *fp = fopen (filename, "rb");
450 if (!fp)
451 return -1;
452 fseeko (fp, 0, SEEK_END);
453 size = ftello (fp);
454 fclose (fp);
455 return size;
456#else
457 struct_stat st;
458 if (stat (filename, &st) < 0)
459 return -1;
460 return st.st_size;
461#endif
462}
463
464/* stat file names named PREFIX.1, PREFIX.2, etc., until one that
465 doesn't exist is found. Return a freshly allocated copy of the
466 unused file name. */
467
468static char *
469unique_name_1 (const char *prefix)
470{
471 int count = 1;
472 int plen = strlen (prefix);
473 char *template = (char *)alloca (plen + 1 + 24);
474 char *template_tail = template + plen;
475
476 memcpy (template, prefix, plen);
477 *template_tail++ = '.';
478
479 do
480 number_to_string (template_tail, count++);
481 while (file_exists_p (template));
482
483 return xstrdup (template);
484}
485
486/* Return a unique file name, based on FILE.
487
488 More precisely, if FILE doesn't exist, it is returned unmodified.
489 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
490 file name that doesn't exist is returned.
491
492 The resulting file is not created, only verified that it didn't
493 exist at the point in time when the function was called.
494 Therefore, where security matters, don't rely that the file created
495 by this function exists until you open it with O_EXCL or
496 equivalent.
497
498 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
499 string. Otherwise, it may return FILE if the file doesn't exist
500 (and therefore doesn't need changing). */
501
502char *
503unique_name (const char *file, int allow_passthrough)
504{
505 /* If the FILE itself doesn't exist, return it without
506 modification. */
507 if (!file_exists_p (file))
508 return allow_passthrough ? (char *)file : xstrdup (file);
509
510 /* Otherwise, find a numeric suffix that results in unused file name
511 and return it. */
512 return unique_name_1 (file);
513}
514
515/* Create a file based on NAME, except without overwriting an existing
516 file with that name. Providing O_EXCL is correctly implemented,
517 this function does not have the race condition associated with
518 opening the file returned by unique_name. */
519
520FILE *
521unique_create (const char *name, int binary, char **opened_name)
522{
523 /* unique file name, based on NAME */
524 char *uname = unique_name (name, 0);
525 FILE *fp;
526 while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
527 {
528 xfree (uname);
529 uname = unique_name (name, 0);
530 }
531 if (opened_name && fp != NULL)
532 {
533 if (fp)
534 *opened_name = uname;
535 else
536 {
537 *opened_name = NULL;
538 xfree (uname);
539 }
540 }
541 else
542 xfree (uname);
543 return fp;
544}
545
546/* Open the file for writing, with the addition that the file is
547 opened "exclusively". This means that, if the file already exists,
548 this function will *fail* and errno will be set to EEXIST. If
549 BINARY is set, the file will be opened in binary mode, equivalent
550 to fopen's "wb".
551
552 If opening the file fails for any reason, including the file having
553 previously existed, this function returns NULL and sets errno
554 appropriately. */
555
556FILE *
557fopen_excl (const char *fname, int binary)
558{
559 int fd;
560#ifdef O_EXCL
561 int flags = O_WRONLY | O_CREAT | O_EXCL;
562# ifdef O_BINARY
563 if (binary)
564 flags |= O_BINARY;
565# endif
566 fd = open (fname, flags, 0666);
567 if (fd < 0)
568 return NULL;
569 return fdopen (fd, binary ? "wb" : "w");
570#else /* not O_EXCL */
571 /* Manually check whether the file exists. This is prone to race
572 conditions, but systems without O_EXCL haven't deserved
573 better. */
574 if (file_exists_p (fname))
575 {
576 errno = EEXIST;
577 return NULL;
578 }
579 return fopen (fname, binary ? "wb" : "w");
580#endif /* not O_EXCL */
581}
582
583
584/* Create DIRECTORY. If some of the pathname components of DIRECTORY
585 are missing, create them first. In case any mkdir() call fails,
586 return its error status. Returns 0 on successful completion.
587
588 The behaviour of this function should be identical to the behaviour
589 of `mkdir -p' on systems where mkdir supports the `-p' option. */
590int
591make_directory (const char *directory)
592{
593 int i, ret, quit = 0;
594 char *dir;
595
596 /* Make a copy of dir, to be able to write to it. Otherwise, the
597 function is unsafe if called with a read-only char *argument. */
598 STRDUP_ALLOCA (dir, directory);
599
600 /* If the first character of dir is '/', skip it (and thus enable
601 creation of absolute-pathname directories. */
602 for (i = (*dir == '/'); 1; ++i)
603 {
604 for (; dir[i] && dir[i] != '/'; i++)
605 ;
606 if (!dir[i])
607 quit = 1;
608 dir[i] = '\0';
609 /* Check whether the directory already exists. Allow creation of
610 of intermediate directories to fail, as the initial path components
611 are not necessarily directories! */
612 if (!file_exists_p (dir))
613 ret = mkdir (dir, 0777);
614 else
615 ret = 0;
616 if (quit)
617 break;
618 else
619 dir[i] = '/';
620 }
621 return ret;
622}
623
624/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
625 should be a file name.
626
627 file_merge("/foo/bar", "baz") => "/foo/baz"
628 file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
629 file_merge("foo", "bar") => "bar"
630
631 In other words, it's a simpler and gentler version of uri_merge_1. */
632
633char *
634file_merge (const char *base, const char *file)
635{
636 char *result;
637 const char *cut = (const char *)strrchr (base, '/');
638
639 if (!cut)
640 return xstrdup (file);
641
642 result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
643 memcpy (result, base, cut - base);
644 result[cut - base] = '/';
645 strcpy (result + (cut - base) + 1, file);
646
647 return result;
648}
649
650
651static int in_acclist PARAMS ((const char *const *, const char *, int));
652
653/* Determine whether a file is acceptable to be followed, according to
654 lists of patterns to accept/reject. */
655int
656acceptable (const char *s)
657{
658 int l = strlen (s);
659
660 while (l && s[l] != '/')
661 --l;
662 if (s[l] == '/')
663 s += (l + 1);
664 if (opt.accepts)
665 {
666 if (opt.rejects)
667 return (in_acclist ((const char *const *)opt.accepts, s, 1)
668 && !in_acclist ((const char *const *)opt.rejects, s, 1));
669 else
670 return in_acclist ((const char *const *)opt.accepts, s, 1);
671 }
672 else if (opt.rejects)
673 return !in_acclist ((const char *const *)opt.rejects, s, 1);
674 return 1;
675}
676
677/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
678 `/something', frontcmp() will return 1 only if S2 begins with
679 `/something'. Otherwise, 0 is returned. */
680int
681frontcmp (const char *s1, const char *s2)
682{
683 for (; *s1 && *s2 && (*s1 == *s2); ++s1, ++s2);
684 return !*s1;
685}
686
687/* Iterate through STRLIST, and return the first element that matches
688 S, through wildcards or front comparison (as appropriate). */
689static char *
690proclist (char **strlist, const char *s, enum accd flags)
691{
692 char **x;
693 for (x = strlist; *x; x++)
694 {
695 /* Remove leading '/' if ALLABS */
696 char *p = *x + ((flags & ALLABS) && (**x == '/'));
697 if (has_wildcards_p (p))
698 {
699 if (fnmatch (p, s, FNM_PATHNAME) == 0)
700 break;
701 }
702 else
703 {
704 if (frontcmp (p, s))
705 break;
706 }
707 }
708 return *x;
709}
710
711/* Returns whether DIRECTORY is acceptable for download, wrt the
712 include/exclude lists.
713
714 If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
715 and absolute paths may be freely intermixed. */
716int
717accdir (const char *directory, enum accd flags)
718{
719 /* Remove starting '/'. */
720 if (flags & ALLABS && *directory == '/')
721 ++directory;
722 if (opt.includes)
723 {
724 if (!proclist (opt.includes, directory, flags))
725 return 0;
726 }
727 if (opt.excludes)
728 {
729 if (proclist (opt.excludes, directory, flags))
730 return 0;
731 }
732 return 1;
733}
734
735/* Return non-zero if STRING ends with TAIL. For instance:
736
737 match_tail ("abc", "bc", 0) -> 1
738 match_tail ("abc", "ab", 0) -> 0
739 match_tail ("abc", "abc", 0) -> 1
740
741 If FOLD_CASE_P is non-zero, the comparison will be
742 case-insensitive. */
743
744int
745match_tail (const char *string, const char *tail, int fold_case_p)
746{
747 int i, j;
748
749 /* We want this to be fast, so we code two loops, one with
750 case-folding, one without. */
751
752 if (!fold_case_p)
753 {
754 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
755 if (string[i] != tail[j])
756 break;
757 }
758 else
759 {
760 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
761 if (TOLOWER (string[i]) != TOLOWER (tail[j]))
762 break;
763 }
764
765 /* If the tail was exhausted, the match was succesful. */
766 if (j == -1)
767 return 1;
768 else
769 return 0;
770}
771
772/* Checks whether string S matches each element of ACCEPTS. A list
773 element are matched either with fnmatch() or match_tail(),
774 according to whether the element contains wildcards or not.
775
776 If the BACKWARD is 0, don't do backward comparison -- just compare
777 them normally. */
778static int
779in_acclist (const char *const *accepts, const char *s, int backward)
780{
781 for (; *accepts; accepts++)
782 {
783 if (has_wildcards_p (*accepts))
784 {
785 /* fnmatch returns 0 if the pattern *does* match the
786 string. */
787 if (fnmatch (*accepts, s, 0) == 0)
788 return 1;
789 }
790 else
791 {
792 if (backward)
793 {
794 if (match_tail (s, *accepts, 0))
795 return 1;
796 }
797 else
798 {
799 if (!strcmp (s, *accepts))
800 return 1;
801 }
802 }
803 }
804 return 0;
805}
806
807/* Return the location of STR's suffix (file extension). Examples:
808 suffix ("foo.bar") -> "bar"
809 suffix ("foo.bar.baz") -> "baz"
810 suffix ("/foo/bar") -> NULL
811 suffix ("/foo.bar/baz") -> NULL */
812char *
813suffix (const char *str)
814{
815 int i;
816
817 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
818 ;
819
820 if (str[i++] == '.')
821 return (char *)str + i;
822 else
823 return NULL;
824}
825
826/* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
827 `]'). */
828
829int
830has_wildcards_p (const char *s)
831{
832 for (; *s; s++)
833 if (*s == '*' || *s == '?' || *s == '[' || *s == ']')
834 return 1;
835 return 0;
836}
837
838/* Return non-zero if FNAME ends with a typical HTML suffix. The
839 following (case-insensitive) suffixes are presumed to be HTML files:
840
841 html
842 htm
843 ?html (`?' matches one character)
844
845 #### CAVEAT. This is not necessarily a good indication that FNAME
846 refers to a file that contains HTML! */
847int
848has_html_suffix_p (const char *fname)
849{
850 char *suf;
851
852 if ((suf = suffix (fname)) == NULL)
853 return 0;
854 if (!strcasecmp (suf, "html"))
855 return 1;
856 if (!strcasecmp (suf, "htm"))
857 return 1;
858 if (suf[0] && !strcasecmp (suf + 1, "html"))
859 return 1;
860 return 0;
861}
862
863/* Read a line from FP and return the pointer to freshly allocated
864 storage. The storage space is obtained through malloc() and should
865 be freed with free() when it is no longer needed.
866
867 The length of the line is not limited, except by available memory.
868 The newline character at the end of line is retained. The line is
869 terminated with a zero character.
870
871 After end-of-file is encountered without anything being read, NULL
872 is returned. NULL is also returned on error. To distinguish
873 between these two cases, use the stdio function ferror(). */
874
875char *
876read_whole_line (FILE *fp)
877{
878 int length = 0;
879 int bufsize = 82;
880 char *line = (char *)xmalloc (bufsize);
881
882 while (fgets (line + length, bufsize - length, fp))
883 {
884 length += strlen (line + length);
885 if (length == 0)
886 /* Possible for example when reading from a binary file where
887 a line begins with \0. */
888 continue;
889
890 if (line[length - 1] == '\n')
891 break;
892
893 /* fgets() guarantees to read the whole line, or to use up the
894 space we've given it. We can double the buffer
895 unconditionally. */
896 bufsize <<= 1;
897 line = xrealloc (line, bufsize);
898 }
899 if (length == 0 || ferror (fp))
900 {
901 xfree (line);
902 return NULL;
903 }
904 if (length + 1 < bufsize)
905 /* Relieve the memory from our exponential greediness. We say
906 `length + 1' because the terminating \0 is not included in
907 LENGTH. We don't need to zero-terminate the string ourselves,
908 though, because fgets() does that. */
909 line = xrealloc (line, length + 1);
910 return line;
911}
912
913
914/* Read FILE into memory. A pointer to `struct file_memory' are
915 returned; use struct element `content' to access file contents, and
916 the element `length' to know the file length. `content' is *not*
917 zero-terminated, and you should *not* read or write beyond the [0,
918 length) range of characters.
919
920 After you are done with the file contents, call read_file_free to
921 release the memory.
922
923 Depending on the operating system and the type of file that is
924 being read, read_file() either mmap's the file into memory, or
925 reads the file into the core using read().
926
927 If file is named "-", fileno(stdin) is used for reading instead.
928 If you want to read from a real file named "-", use "./-" instead. */
929
930struct file_memory *
931read_file (const char *file)
932{
933 int fd;
934 struct file_memory *fm;
935 long size;
936 int inhibit_close = 0;
937
938 /* Some magic in the finest tradition of Perl and its kin: if FILE
939 is "-", just use stdin. */
940 if (HYPHENP (file))
941 {
942 fd = fileno (stdin);
943 inhibit_close = 1;
944 /* Note that we don't inhibit mmap() in this case. If stdin is
945 redirected from a regular file, mmap() will still work. */
946 }
947 else
948 fd = open (file, O_RDONLY);
949 if (fd < 0)
950 return NULL;
951 fm = xnew (struct file_memory);
952
953#ifdef HAVE_MMAP
954 {
955 struct_fstat buf;
956 if (fstat (fd, &buf) < 0)
957 goto mmap_lose;
958 fm->length = buf.st_size;
959 /* NOTE: As far as I know, the callers of this function never
960 modify the file text. Relying on this would enable us to
961 specify PROT_READ and MAP_SHARED for a marginal gain in
962 efficiency, but at some cost to generality. */
963 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
964 MAP_PRIVATE, fd, 0);
965 if (fm->content == (char *)MAP_FAILED)
966 goto mmap_lose;
967 if (!inhibit_close)
968 close (fd);
969
970 fm->mmap_p = 1;
971 return fm;
972 }
973
974 mmap_lose:
975 /* The most common reason why mmap() fails is that FD does not point
976 to a plain file. However, it's also possible that mmap() doesn't
977 work for a particular type of file. Therefore, whenever mmap()
978 fails, we just fall back to the regular method. */
979#endif /* HAVE_MMAP */
980
981 fm->length = 0;
982 size = 512; /* number of bytes fm->contents can
983 hold at any given time. */
984 fm->content = xmalloc (size);
985 while (1)
986 {
987 wgint nread;
988 if (fm->length > size / 2)
989 {
990 /* #### I'm not sure whether the whole exponential-growth
991 thing makes sense with kernel read. On Linux at least,
992 read() refuses to read more than 4K from a file at a
993 single chunk anyway. But other Unixes might optimize it
994 better, and it doesn't *hurt* anything, so I'm leaving
995 it. */
996
997 /* Normally, we grow SIZE exponentially to make the number
998 of calls to read() and realloc() logarithmic in relation
999 to file size. However, read() can read an amount of data
1000 smaller than requested, and it would be unreasonable to
1001 double SIZE every time *something* was read. Therefore,
1002 we double SIZE only when the length exceeds half of the
1003 entire allocated size. */
1004 size <<= 1;
1005 fm->content = xrealloc (fm->content, size);
1006 }
1007 nread = read (fd, fm->content + fm->length, size - fm->length);
1008 if (nread > 0)
1009 /* Successful read. */
1010 fm->length += nread;
1011 else if (nread < 0)
1012 /* Error. */
1013 goto lose;
1014 else
1015 /* EOF */
1016 break;
1017 }
1018 if (!inhibit_close)
1019 close (fd);
1020 if (size > fm->length && fm->length != 0)
1021 /* Due to exponential growth of fm->content, the allocated region
1022 might be much larger than what is actually needed. */
1023 fm->content = xrealloc (fm->content, fm->length);
1024 fm->mmap_p = 0;
1025 return fm;
1026
1027 lose:
1028 if (!inhibit_close)
1029 close (fd);
1030 xfree (fm->content);
1031 xfree (fm);
1032 return NULL;
1033}
1034
1035/* Release the resources held by FM. Specifically, this calls
1036 munmap() or xfree() on fm->content, depending whether mmap or
1037 malloc/read were used to read in the file. It also frees the
1038 memory needed to hold the FM structure itself. */
1039
1040void
1041read_file_free (struct file_memory *fm)
1042{
1043#ifdef HAVE_MMAP
1044 if (fm->mmap_p)
1045 {
1046 munmap (fm->content, fm->length);
1047 }
1048 else
1049#endif
1050 {
1051 xfree (fm->content);
1052 }
1053 xfree (fm);
1054}
1055
1056
1057/* Free the pointers in a NULL-terminated vector of pointers, then
1058 free the pointer itself. */
1059void
1060free_vec (char **vec)
1061{
1062 if (vec)
1063 {
1064 char **p = vec;
1065 while (*p)
1066 xfree (*p++);
1067 xfree (vec);
1068 }
1069}
1070
1071/* Append vector V2 to vector V1. The function frees V2 and
1072 reallocates V1 (thus you may not use the contents of neither
1073 pointer after the call). If V1 is NULL, V2 is returned. */
1074char **
1075merge_vecs (char **v1, char **v2)
1076{
1077 int i, j;
1078
1079 if (!v1)
1080 return v2;
1081 if (!v2)
1082 return v1;
1083 if (!*v2)
1084 {
1085 /* To avoid j == 0 */
1086 xfree (v2);
1087 return v1;
1088 }
1089 /* Count v1. */
1090 for (i = 0; v1[i]; i++);
1091 /* Count v2. */
1092 for (j = 0; v2[j]; j++);
1093 /* Reallocate v1. */
1094 v1 = (char **)xrealloc (v1, (i + j + 1) * sizeof (char **));
1095 memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1096 xfree (v2);
1097 return v1;
1098}
1099
1100/* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it
1101 is allocated as needed. Return the new value of the vector. */
1102
1103char **
1104vec_append (char **vec, const char *str)
1105{
1106 int cnt; /* count of vector elements, including
1107 the one we're about to append */
1108 if (vec != NULL)
1109 {
1110 for (cnt = 0; vec[cnt]; cnt++)
1111 ;
1112 ++cnt;
1113 }
1114 else
1115 cnt = 1;
1116 /* Reallocate the array to fit the new element and the NULL. */
1117 vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1118 /* Append a copy of STR to the vector. */
1119 vec[cnt - 1] = xstrdup (str);
1120 vec[cnt] = NULL;
1121 return vec;
1122}
1123
1124
1125/* Sometimes it's useful to create "sets" of strings, i.e. special
1126 hash tables where you want to store strings as keys and merely
1127 query for their existence. Here is a set of utility routines that
1128 makes that transparent. */
1129
1130void
1131string_set_add (struct hash_table *ht, const char *s)
1132{
1133 /* First check whether the set element already exists. If it does,
1134 do nothing so that we don't have to free() the old element and
1135 then strdup() a new one. */
1136 if (hash_table_contains (ht, s))
1137 return;
1138
1139 /* We use "1" as value. It provides us a useful and clear arbitrary
1140 value, and it consumes no memory -- the pointers to the same
1141 string "1" will be shared by all the key-value pairs in all `set'
1142 hash tables. */
1143 hash_table_put (ht, xstrdup (s), "1");
1144}
1145
1146/* Synonym for hash_table_contains... */
1147
1148int
1149string_set_contains (struct hash_table *ht, const char *s)
1150{
1151 return hash_table_contains (ht, s);
1152}
1153
1154static int
1155string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
1156{
1157 char ***arrayptr = (char ***) arg;
1158 *(*arrayptr)++ = (char *) key;
1159 return 0;
1160}
1161
1162/* Convert the specified string set to array. ARRAY should be large
1163 enough to hold hash_table_count(ht) char pointers. */
1164
1165void string_set_to_array (struct hash_table *ht, char **array)
1166{
1167 hash_table_map (ht, string_set_to_array_mapper, &array);
1168}
1169
1170static int
1171string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
1172{
1173 xfree (key);
1174 return 0;
1175}
1176
1177void
1178string_set_free (struct hash_table *ht)
1179{
1180 hash_table_map (ht, string_set_free_mapper, NULL);
1181 hash_table_destroy (ht);
1182}
1183
1184static int
1185free_keys_and_values_mapper (void *key, void *value, void *arg_ignored)
1186{
1187 xfree (key);
1188 xfree (value);
1189 return 0;
1190}
1191
1192/* Another utility function: call free() on all keys and values of HT. */
1193
1194void
1195free_keys_and_values (struct hash_table *ht)
1196{
1197 hash_table_map (ht, free_keys_and_values_mapper, NULL);
1198}
1199
1200
1201static void
1202get_grouping_data (const char **sep, const char **grouping)
1203{
1204 static const char *cached_sep;
1205 static const char *cached_grouping;
1206 static int initialized;
1207 if (!initialized)
1208 {
1209 /* If locale.h is present and defines LC_NUMERIC, assume C89
1210 struct lconv with "thousand_sep" and "grouping" members. */
1211#ifdef LC_NUMERIC
1212 /* Get the grouping info from the locale. */
1213 struct lconv *lconv;
1214 const char *oldlocale = setlocale (LC_NUMERIC, NULL);
1215 /* Temporarily switch to the current locale */
1216 setlocale (LC_NUMERIC, "");
1217 lconv = localeconv ();
1218 cached_sep = xstrdup (lconv->thousands_sep);
1219 cached_grouping = xstrdup (lconv->grouping);
1220 /* Restore the locale to previous setting. */
1221 setlocale (LC_NUMERIC, oldlocale);
1222 if (!*cached_sep)
1223#endif
1224 /* Force separator for locales that specify no separators
1225 ("C", "hr", and probably many more.) */
1226 cached_sep = ",", cached_grouping = "\x03";
1227 initialized = 1;
1228 }
1229 *sep = cached_sep;
1230 *grouping = cached_grouping;
1231}
1232
1233/* Add thousand separators to a number already in string form. Used
1234 by with_thousand_seps and with_thousand_seps_sum. */
1235
1236char *
1237add_thousand_seps (const char *repr)
1238{
1239 static char outbuf[48];
1240 char *p = outbuf + sizeof outbuf;
1241
1242 const char *in = strchr (repr, '\0');
1243 const char *instart = repr + (*repr == '-'); /* don't group sign */
1244
1245 /* Info received from locale */
1246 const char *grouping, *sep;
1247 int seplen;
1248
1249 /* State information */
1250 int i = 0, groupsize;
1251 const char *atgroup;
1252
1253 /* Initialize grouping data. */
1254 get_grouping_data (&sep, &grouping);
1255 seplen = strlen (sep);
1256 atgroup = grouping;
1257 groupsize = *atgroup++;
1258
1259 /* Write the number into the buffer, backwards, inserting the
1260 separators as necessary. */
1261 *--p = '\0';
1262 while (1)
1263 {
1264 *--p = *--in;
1265 if (in == instart)
1266 break;
1267 /* Prepend SEP to every groupsize'd digit and get new groupsize. */
1268 if (++i == groupsize)
1269 {
1270 if (seplen == 1)
1271 *--p = *sep;
1272 else
1273 memcpy (p -= seplen, sep, seplen);
1274 i = 0;
1275 if (*atgroup)
1276 groupsize = *atgroup++;
1277 }
1278 }
1279 if (*repr == '-')
1280 *--p = '-';
1281
1282 return p;
1283}
1284
1285/* Return a printed representation of N with thousand separators.
1286 This should respect locale settings, with the exception of the "C"
1287 locale which mandates no separator, but we use one anyway.
1288
1289 Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1290 the separators because it's too non-portable, and it's hard to test
1291 for this feature at configure time. Besides, it wouldn't work in
1292 the "C" locale, which many Unix users still work in. */
1293
1294char *
1295with_thousand_seps (wgint l)
1296{
1297 char inbuf[24];
1298 /* Print the number into the buffer. */
1299 number_to_string (inbuf, l);
1300 return add_thousand_seps (inbuf);
1301}
1302
1303/* When SUM_SIZE_INT is wgint, with_thousand_seps_large is #defined to
1304 with_thousand_seps. The function below is used on non-LFS systems
1305 where SUM_SIZE_INT typedeffed to double. */
1306
1307#ifndef with_thousand_seps_sum
1308char *
1309with_thousand_seps_sum (SUM_SIZE_INT l)
1310{
1311 char inbuf[32];
1312 snprintf (inbuf, sizeof (inbuf), "%.0f", l);
1313 return add_thousand_seps (inbuf);
1314}
1315#endif /* not with_thousand_seps_sum */
1316
1317/* N, a byte quantity, is converted to a human-readable abberviated
1318 form a la sizes printed by `ls -lh'. The result is written to a
1319 static buffer, a pointer to which is returned.
1320
1321 Unlike `with_thousand_seps', this approximates to the nearest unit.
1322 Quoting GNU libit: "Most people visually process strings of 3-4
1323 digits effectively, but longer strings of digits are more prone to
1324 misinterpretation. Hence, converting to an abbreviated form
1325 usually improves readability."
1326
1327 This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1328 original computer science meaning of "powers of 1024". Powers of
1329 1000 would be useless since Wget already displays sizes with
1330 thousand separators. We don't use the "*bibyte" names invented in
1331 1998, and seldom used in practice. Wikipedia's entry on kilobyte
1332 discusses this in some detail. */
1333
1334char *
1335human_readable (wgint n)
1336{
1337 /* These suffixes are compatible with those of GNU `ls -lh'. */
1338 static char powers[] =
1339 {
1340 'K', /* kilobyte, 2^10 bytes */
1341 'M', /* megabyte, 2^20 bytes */
1342 'G', /* gigabyte, 2^30 bytes */
1343 'T', /* terabyte, 2^40 bytes */
1344 'P', /* petabyte, 2^50 bytes */
1345 'E', /* exabyte, 2^60 bytes */
1346 };
1347 static char buf[8];
1348 int i;
1349
1350 /* If the quantity is smaller than 1K, just print it. */
1351 if (n < 1024)
1352 {
1353 snprintf (buf, sizeof (buf), "%d", (int) n);
1354 return buf;
1355 }
1356
1357 /* Loop over powers, dividing N with 1024 in each iteration. This
1358 works unchanged for all sizes of wgint, while still avoiding
1359 non-portable `long double' arithmetic. */
1360 for (i = 0; i < countof (powers); i++)
1361 {
1362 /* At each iteration N is greater than the *subsequent* power.
1363 That way N/1024.0 produces a decimal number in the units of
1364 *this* power. */
1365 if ((n >> 10) < 1024 || i == countof (powers) - 1)
1366 {
1367 double val = n / 1024.0;
1368 /* Print values smaller than 10 with one decimal digits, and
1369 others without any decimals. */
1370 snprintf (buf, sizeof (buf), "%.*f%c",
1371 val < 10 ? 1 : 0, val, powers[i]);
1372 return buf;
1373 }
1374 n >>= 10;
1375 }
1376 return NULL; /* unreached */
1377}
1378
1379/* Count the digits in the provided number. Used to allocate space
1380 when printing numbers. */
1381
1382int
1383numdigit (wgint number)
1384{
1385 int cnt = 1;
1386 if (number < 0)
1387 ++cnt; /* accomodate '-' */
1388 while ((number /= 10) != 0)
1389 ++cnt;
1390 return cnt;
1391}
1392
1393#define PR(mask) *p++ = n / (mask) + '0'
1394
1395/* DIGITS_<D> is used to print a D-digit number and should be called
1396 with mask==10^(D-1). It prints n/mask (the first digit), reducing
1397 n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1398 Recursively this continues until DIGITS_1 is invoked. */
1399
1400#define DIGITS_1(mask) PR (mask)
1401#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1402#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1403#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1404#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1405#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1406#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1407#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1408#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1409#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1410
1411/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1412
1413#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1414#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1415#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1416#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1417#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1418#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1419#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1420#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1421#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1422
1423/* SPRINTF_WGINT is used by number_to_string to handle pathological
1424 cases and to portably support strange sizes of wgint. Ideally this
1425 would just use "%j" and intmax_t, but many systems don't support
1426 it, so it's used only if nothing else works. */
1427#if SIZEOF_LONG >= SIZEOF_WGINT
1428# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n))
1429#else
1430# if SIZEOF_LONG_LONG >= SIZEOF_WGINT
1431# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n))
1432# else
1433# ifdef WINDOWS
1434# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n))
1435# else
1436# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n))
1437# endif
1438# endif
1439#endif
1440
1441/* Shorthand for casting to wgint. */
1442#define W wgint
1443
1444/* Print NUMBER to BUFFER in base 10. This is equivalent to
1445 `sprintf(buffer, "%lld", (long long) number)', only typically much
1446 faster and portable to machines without long long.
1447
1448 The speedup may make a difference in programs that frequently
1449 convert numbers to strings. Some implementations of sprintf,
1450 particularly the one in GNU libc, have been known to be extremely
1451 slow when converting integers to strings.
1452
1453 Return the pointer to the location where the terminating zero was
1454 printed. (Equivalent to calling buffer+strlen(buffer) after the
1455 function is done.)
1456
1457 BUFFER should be big enough to accept as many bytes as you expect
1458 the number to take up. On machines with 64-bit longs the maximum
1459 needed size is 24 bytes. That includes the digits needed for the
1460 largest 64-bit number, the `-' sign in case it's negative, and the
1461 terminating '\0'. */
1462
1463char *
1464number_to_string (char *buffer, wgint number)
1465{
1466 char *p = buffer;
1467 wgint n = number;
1468
1469#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1470 /* We are running in a strange or misconfigured environment. Let
1471 sprintf cope with it. */
1472 SPRINTF_WGINT (buffer, n);
1473 p += strlen (buffer);
1474#else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1475
1476 if (n < 0)
1477 {
1478 if (n < -WGINT_MAX)
1479 {
1480 /* -n would overflow. Have sprintf deal with this. */
1481 SPRINTF_WGINT (buffer, n);
1482 p += strlen (buffer);
1483 return p;
1484 }
1485
1486 *p++ = '-';
1487 n = -n;
1488 }
1489
1490 /* Use the DIGITS_ macro appropriate for N's number of digits. That
1491 way printing any N is fully open-coded without a loop or jump.
1492 (Also see description of DIGITS_*.) */
1493
1494 if (n < 10) DIGITS_1 (1);
1495 else if (n < 100) DIGITS_2 (10);
1496 else if (n < 1000) DIGITS_3 (100);
1497 else if (n < 10000) DIGITS_4 (1000);
1498 else if (n < 100000) DIGITS_5 (10000);
1499 else if (n < 1000000) DIGITS_6 (100000);
1500 else if (n < 10000000) DIGITS_7 (1000000);
1501 else if (n < 100000000) DIGITS_8 (10000000);
1502 else if (n < 1000000000) DIGITS_9 (100000000);
1503#if SIZEOF_WGINT == 4
1504 /* wgint is 32 bits wide: no number has more than 10 digits. */
1505 else DIGITS_10 (1000000000);
1506#else
1507 /* wgint is 64 bits wide: handle numbers with more than 9 decimal
1508 digits. Constants are constructed by compile-time multiplication
1509 to avoid dealing with different notations for 64-bit constants
1510 (nnnL, nnnLL, and nnnI64, depending on the compiler). */
1511 else if (n < 10*(W)1000000000) DIGITS_10 (1000000000);
1512 else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000);
1513 else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000);
1514 else if (n < 10000*(W)1000000000) DIGITS_13 (1000*(W)1000000000);
1515 else if (n < 100000*(W)1000000000) DIGITS_14 (10000*(W)1000000000);
1516 else if (n < 1000000*(W)1000000000) DIGITS_15 (100000*(W)1000000000);
1517 else if (n < 10000000*(W)1000000000) DIGITS_16 (1000000*(W)1000000000);
1518 else if (n < 100000000*(W)1000000000) DIGITS_17 (10000000*(W)1000000000);
1519 else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1520 else DIGITS_19 (1000000000*(W)1000000000);
1521#endif
1522
1523 *p = '\0';
1524#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1525
1526 return p;
1527}
1528
1529#undef PR
1530#undef W
1531#undef DIGITS_1
1532#undef DIGITS_2
1533#undef DIGITS_3
1534#undef DIGITS_4
1535#undef DIGITS_5
1536#undef DIGITS_6
1537#undef DIGITS_7
1538#undef DIGITS_8
1539#undef DIGITS_9
1540#undef DIGITS_10
1541#undef DIGITS_11
1542#undef DIGITS_12
1543#undef DIGITS_13
1544#undef DIGITS_14
1545#undef DIGITS_15
1546#undef DIGITS_16
1547#undef DIGITS_17
1548#undef DIGITS_18
1549#undef DIGITS_19
1550
1551#define RING_SIZE 3
1552
1553/* Print NUMBER to a statically allocated string and return a pointer
1554 to the printed representation.
1555
1556 This function is intended to be used in conjunction with printf.
1557 It is hard to portably print wgint values:
1558 a) you cannot use printf("%ld", number) because wgint can be long
1559 long on 32-bit machines with LFS.
1560 b) you cannot use printf("%lld", number) because NUMBER could be
1561 long on 32-bit machines without LFS, or on 64-bit machines,
1562 which do not require LFS. Also, Windows doesn't support %lld.
1563 c) you cannot use printf("%j", (int_max_t) number) because not all
1564 versions of printf support "%j", the most notable being the one
1565 on Windows.
1566 d) you cannot #define WGINT_FMT to the appropriate format and use
1567 printf(WGINT_FMT, number) because that would break translations
1568 for user-visible messages, such as printf("Downloaded: %d
1569 bytes\n", number).
1570
1571 What you should use instead is printf("%s", number_to_static_string
1572 (number)).
1573
1574 CAVEAT: since the function returns pointers to static data, you
1575 must be careful to copy its result before calling it again.
1576 However, to make it more useful with printf, the function maintains
1577 an internal ring of static buffers to return. That way things like
1578 printf("%s %s", number_to_static_string (num1),
1579 number_to_static_string (num2)) work as expected. Three buffers
1580 are currently used, which means that "%s %s %s" will work, but "%s
1581 %s %s %s" won't. If you need to print more than three wgints,
1582 bump the RING_SIZE (or rethink your message.) */
1583
1584char *
1585number_to_static_string (wgint number)
1586{
1587 static char ring[RING_SIZE][24];
1588 static int ringpos;
1589 char *buf = ring[ringpos];
1590 number_to_string (buf, number);
1591 ringpos = (ringpos + 1) % RING_SIZE;
1592 return buf;
1593}
1594
1595
1596/* Determine the width of the terminal we're running on. If that's
1597 not possible, return 0. */
1598
1599int
1600determine_screen_width (void)
1601{
1602 /* If there's a way to get the terminal size using POSIX
1603 tcgetattr(), somebody please tell me. */
1604#ifdef TIOCGWINSZ
1605 int fd;
1606 struct winsize wsz;
1607
1608 if (opt.lfilename != NULL)
1609 return 0;
1610
1611 fd = fileno (stderr);
1612 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1613 return 0; /* most likely ENOTTY */
1614
1615 return wsz.ws_col;
1616#else /* not TIOCGWINSZ */
1617# ifdef WINDOWS
1618 CONSOLE_SCREEN_BUFFER_INFO csbi;
1619 if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1620 return 0;
1621 return csbi.dwSize.X;
1622# else /* neither WINDOWS nor TIOCGWINSZ */
1623 return 0;
1624#endif /* neither WINDOWS nor TIOCGWINSZ */
1625#endif /* not TIOCGWINSZ */
1626}
1627
1628/* Return a random number between 0 and MAX-1, inclusive.
1629
1630 If MAX is greater than the value of RAND_MAX+1 on the system, the
1631 returned value will be in the range [0, RAND_MAX]. This may be
1632 fixed in a future release.
1633
1634 The random number generator is seeded automatically the first time
1635 it is called.
1636
1637 This uses rand() for portability. It has been suggested that
1638 random() offers better randomness, but this is not required for
1639 Wget, so I chose to go for simplicity and use rand
1640 unconditionally.
1641
1642 DO NOT use this for cryptographic purposes. It is only meant to be
1643 used in situations where quality of the random numbers returned
1644 doesn't really matter. */
1645
1646int
1647random_number (int max)
1648{
1649 static int seeded;
1650 double bounded;
1651 int rnd;
1652
1653 if (!seeded)
1654 {
1655 srand (time (NULL));
1656 seeded = 1;
1657 }
1658 rnd = rand ();
1659
1660 /* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1661 and enforce that assumption by masking other bits. */
1662#ifndef RAND_MAX
1663# define RAND_MAX 32767
1664 rnd &= RAND_MAX;
1665#endif
1666
1667 /* This is equivalent to rand() % max, but uses the high-order bits
1668 for better randomness on architecture where rand() is implemented
1669 using a simple congruential generator. */
1670
1671 bounded = (double)max * rnd / (RAND_MAX + 1.0);
1672 return (int)bounded;
1673}
1674
1675/* Return a random uniformly distributed floating point number in the
1676 [0, 1) range. The precision of returned numbers is 9 digits.
1677
1678 Modify this to use erand48() where available! */
1679
1680double
1681random_float (void)
1682{
1683 /* We can't rely on any specific value of RAND_MAX, but I'm pretty
1684 sure it's greater than 1000. */
1685 int rnd1 = random_number (1000);
1686 int rnd2 = random_number (1000);
1687 int rnd3 = random_number (1000);
1688 return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1689}
1690
1691
1692/* Implementation of run_with_timeout, a generic timeout-forcing
1693 routine for systems with Unix-like signal handling. */
1694
1695#ifdef USE_SIGNAL_TIMEOUT
1696# ifdef HAVE_SIGSETJMP
1697# define SETJMP(env) sigsetjmp (env, 1)
1698
1699static sigjmp_buf run_with_timeout_env;
1700
1701static RETSIGTYPE
1702abort_run_with_timeout (int sig)
1703{
1704 assert (sig == SIGALRM);
1705 siglongjmp (run_with_timeout_env, -1);
1706}
1707# else /* not HAVE_SIGSETJMP */
1708# define SETJMP(env) setjmp (env)
1709
1710static jmp_buf run_with_timeout_env;
1711
1712static RETSIGTYPE
1713abort_run_with_timeout (int sig)
1714{
1715 assert (sig == SIGALRM);
1716 /* We don't have siglongjmp to preserve the set of blocked signals;
1717 if we longjumped out of the handler at this point, SIGALRM would
1718 remain blocked. We must unblock it manually. */
1719 int mask = siggetmask ();
1720 mask &= ~sigmask (SIGALRM);
1721 sigsetmask (mask);
1722
1723 /* Now it's safe to longjump. */
1724 longjmp (run_with_timeout_env, -1);
1725}
1726# endif /* not HAVE_SIGSETJMP */
1727
1728/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1729 setitimer where available, alarm otherwise.
1730
1731 TIMEOUT should be non-zero. If the timeout value is so small that
1732 it would be rounded to zero, it is rounded to the least legal value
1733 instead (1us for setitimer, 1s for alarm). That ensures that
1734 SIGALRM will be delivered in all cases. */
1735
1736static void
1737alarm_set (double timeout)
1738{
1739#ifdef ITIMER_REAL
1740 /* Use the modern itimer interface. */
1741 struct itimerval itv;
1742 xzero (itv);
1743 itv.it_value.tv_sec = (long) timeout;
1744 itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1745 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1746 /* Ensure that we wait for at least the minimum interval.
1747 Specifying zero would mean "wait forever". */
1748 itv.it_value.tv_usec = 1;
1749 setitimer (ITIMER_REAL, &itv, NULL);
1750#else /* not ITIMER_REAL */
1751 /* Use the old alarm() interface. */
1752 int secs = (int) timeout;
1753 if (secs == 0)
1754 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1755 because alarm(0) means "never deliver the alarm", i.e. "wait
1756 forever", which is not what someone who specifies a 0.5s
1757 timeout would expect. */
1758 secs = 1;
1759 alarm (secs);
1760#endif /* not ITIMER_REAL */
1761}
1762
1763/* Cancel the alarm set with alarm_set. */
1764
1765static void
1766alarm_cancel (void)
1767{
1768#ifdef ITIMER_REAL
1769 struct itimerval disable;
1770 xzero (disable);
1771 setitimer (ITIMER_REAL, &disable, NULL);
1772#else /* not ITIMER_REAL */
1773 alarm (0);
1774#endif /* not ITIMER_REAL */
1775}
1776
1777/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1778 seconds. Returns non-zero if the function was interrupted with a
1779 timeout, zero otherwise.
1780
1781 This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1782 using setitimer() or alarm(). The timeout is enforced by
1783 longjumping out of the SIGALRM handler. This has several
1784 advantages compared to the traditional approach of relying on
1785 signals causing system calls to exit with EINTR:
1786
1787 * The callback function is *forcibly* interrupted after the
1788 timeout expires, (almost) regardless of what it was doing and
1789 whether it was in a syscall. For example, a calculation that
1790 takes a long time is interrupted as reliably as an IO
1791 operation.
1792
1793 * It works with both SYSV and BSD signals because it doesn't
1794 depend on the default setting of SA_RESTART.
1795
1796 * It doesn't require special handler setup beyond a simple call
1797 to signal(). (It does use sigsetjmp/siglongjmp, but they're
1798 optional.)
1799
1800 The only downside is that, if FUN allocates internal resources that
1801 are normally freed prior to exit from the functions, they will be
1802 lost in case of timeout. */
1803
1804int
1805run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1806{
1807 int saved_errno;
1808
1809 if (timeout == 0)
1810 {
1811 fun (arg);
1812 return 0;
1813 }
1814
1815 signal (SIGALRM, abort_run_with_timeout);
1816 if (SETJMP (run_with_timeout_env) != 0)
1817 {
1818 /* Longjumped out of FUN with a timeout. */
1819 signal (SIGALRM, SIG_DFL);
1820 return 1;
1821 }
1822 alarm_set (timeout);
1823 fun (arg);
1824
1825 /* Preserve errno in case alarm() or signal() modifies it. */
1826 saved_errno = errno;
1827 alarm_cancel ();
1828 signal (SIGALRM, SIG_DFL);
1829 errno = saved_errno;
1830
1831 return 0;
1832}
1833
1834#else /* not USE_SIGNAL_TIMEOUT */
1835
1836#ifndef WINDOWS
1837/* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1838 define it under Windows, because Windows has its own version of
1839 run_with_timeout that uses threads. */
1840
1841int
1842run_with_timeout (double timeout, void (*fun) (void *), void *arg)
1843{
1844 fun (arg);
1845 return 0;
1846}
1847#endif /* not WINDOWS */
1848#endif /* not USE_SIGNAL_TIMEOUT */
1849
1850
1851#ifndef WINDOWS
1852
1853/* Sleep the specified amount of seconds. On machines without
1854 nanosleep(), this may sleep shorter if interrupted by signals. */
1855
1856void
1857xsleep (double seconds)
1858{
1859#ifdef HAVE_NANOSLEEP
1860 /* nanosleep is the preferred interface because it offers high
1861 accuracy and, more importantly, because it allows us to reliably
1862 restart receiving a signal such as SIGWINCH. (There was an
1863 actual Debian bug report about --limit-rate malfunctioning while
1864 the terminal was being resized.) */
1865 struct timespec sleep, remaining;
1866 sleep.tv_sec = (long) seconds;
1867 sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1868 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1869 /* If nanosleep has been interrupted by a signal, adjust the
1870 sleeping period and return to sleep. */
1871 sleep = remaining;
1872#else /* not HAVE_NANOSLEEP */
1873#ifdef HAVE_USLEEP
1874 /* If usleep is available, use it in preference to select. */
1875 if (seconds >= 1)
1876 {
1877 /* On some systems, usleep cannot handle values larger than
1878 1,000,000. If the period is larger than that, use sleep
1879 first, then add usleep for subsecond accuracy. */
1880 sleep (seconds);
1881 seconds -= (long) seconds;
1882 }
1883 usleep (seconds * 1000000);
1884#else /* not HAVE_USLEEP */
1885#ifdef HAVE_SELECT
1886 /* Note that, although Windows supports select, this sleeping
1887 strategy doesn't work there because Winsock's select doesn't
1888 implement timeout when it is passed NULL pointers for all fd
1889 sets. (But it does work under Cygwin, which implements its own
1890 select.) */
1891 struct timeval sleep;
1892 sleep.tv_sec = (long) seconds;
1893 sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1894 select (0, NULL, NULL, NULL, &sleep);
1895 /* If select returns -1 and errno is EINTR, it means we were
1896 interrupted by a signal. But without knowing how long we've
1897 actually slept, we can't return to sleep. Using gettimeofday to
1898 track sleeps is slow and unreliable due to clock skew. */
1899#else /* not HAVE_SELECT */
1900 sleep (seconds);
1901#endif /* not HAVE_SELECT */
1902#endif /* not HAVE_USLEEP */
1903#endif /* not HAVE_NANOSLEEP */
1904}
1905
1906#endif /* not WINDOWS */
1907
1908/* Encode the string STR of length LENGTH to base64 format and place it
1909 to B64STORE. The output will be \0-terminated, and must point to a
1910 writable buffer of at least 1+BASE64_LENGTH(length) bytes. It
1911 returns the length of the resulting base64 data, not counting the
1912 terminating zero.
1913
1914 This implementation will not emit newlines after 76 characters of
1915 base64 data. */
1916
1917int
1918base64_encode (const char *str, int length, char *b64store)
1919{
1920 /* Conversion table. */
1921 static char tbl[64] = {
1922 'A','B','C','D','E','F','G','H',
1923 'I','J','K','L','M','N','O','P',
1924 'Q','R','S','T','U','V','W','X',
1925 'Y','Z','a','b','c','d','e','f',
1926 'g','h','i','j','k','l','m','n',
1927 'o','p','q','r','s','t','u','v',
1928 'w','x','y','z','0','1','2','3',
1929 '4','5','6','7','8','9','+','/'
1930 };
1931 int i;
1932 const unsigned char *s = (const unsigned char *) str;
1933 char *p = b64store;
1934
1935 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */
1936 for (i = 0; i < length; i += 3)
1937 {
1938 *p++ = tbl[s[0] >> 2];
1939 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1940 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1941 *p++ = tbl[s[2] & 0x3f];
1942 s += 3;
1943 }
1944
1945 /* Pad the result if necessary... */
1946 if (i == length + 1)
1947 *(p - 1) = '=';
1948 else if (i == length + 2)
1949 *(p - 1) = *(p - 2) = '=';
1950
1951 /* ...and zero-terminate it. */
1952 *p = '\0';
1953
1954 return p - b64store;
1955}
1956
1957#define IS_ASCII(c) (((c) & 0x80) == 0)
1958#define IS_BASE64(c) ((IS_ASCII (c) && base64_char_to_value[c] >= 0) || c == '=')
1959
1960/* Get next character from the string, except that non-base64
1961 characters are ignored, as mandated by rfc2045. */
1962#define NEXT_BASE64_CHAR(c, p) do { \
1963 c = *p++; \
1964} while (c != '\0' && !IS_BASE64 (c))
1965
1966/* Decode data from BASE64 (assumed to be encoded as base64) into
1967 memory pointed to by TO. TO should be large enough to accomodate
1968 the decoded data, which is guaranteed to be less than
1969 strlen(base64).
1970
1971 Since TO is assumed to contain binary data, it is not
1972 NUL-terminated. The function returns the length of the data
1973 written to TO. -1 is returned in case of error caused by malformed
1974 base64 input. */
1975
1976int
1977base64_decode (const char *base64, char *to)
1978{
1979 /* Table of base64 values for first 128 characters. Note that this
1980 assumes ASCII (but so does Wget in other places). */
1981 static short base64_char_to_value[128] =
1982 {
1983 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
1984 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
1985 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
1986 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
1987 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */
1988 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
1989 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
1990 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
1991 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
1992 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */
1993 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
1994 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
1995 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
1996 };
1997
1998 const char *p = base64;
1999 char *q = to;
2000
2001 while (1)
2002 {
2003 unsigned char c;
2004 unsigned long value;
2005
2006 /* Process first byte of a quadruplet. */
2007 NEXT_BASE64_CHAR (c, p);
2008 if (!c)
2009 break;
2010 if (c == '=')
2011 return -1; /* illegal '=' while decoding base64 */
2012 value = base64_char_to_value[c] << 18;
2013
2014 /* Process scond byte of a quadruplet. */
2015 NEXT_BASE64_CHAR (c, p);
2016 if (!c)
2017 return -1; /* premature EOF while decoding base64 */
2018 if (c == '=')
2019 return -1; /* illegal `=' while decoding base64 */
2020 value |= base64_char_to_value[c] << 12;
2021 *q++ = value >> 16;
2022
2023 /* Process third byte of a quadruplet. */
2024 NEXT_BASE64_CHAR (c, p);
2025 if (!c)
2026 return -1; /* premature EOF while decoding base64 */
2027
2028 if (c == '=')
2029 {
2030 NEXT_BASE64_CHAR (c, p);
2031 if (!c)
2032 return -1; /* premature EOF while decoding base64 */
2033 if (c != '=')
2034 return -1; /* padding `=' expected but not found */
2035 continue;
2036 }
2037
2038 value |= base64_char_to_value[c] << 6;
2039 *q++ = 0xff & value >> 8;
2040
2041 /* Process fourth byte of a quadruplet. */
2042 NEXT_BASE64_CHAR (c, p);
2043 if (!c)
2044 return -1; /* premature EOF while decoding base64 */
2045 if (c == '=')
2046 continue;
2047
2048 value |= base64_char_to_value[c];
2049 *q++ = 0xff & value;
2050 }
2051
2052 return q - to;
2053}
2054
2055#undef IS_ASCII
2056#undef IS_BASE64
2057#undef NEXT_BASE64_CHAR
2058
2059
2060/* Simple merge sort for use by stable_sort. Implementation courtesy
2061 Zeljko Vrba with additional debugging by Nenad Barbutov. */
2062
2063static void
2064mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2065 int (*cmpfun) PARAMS ((const void *, const void *)))
2066{
2067#define ELT(array, pos) ((char *)(array) + (pos) * size)
2068 if (from < to)
2069 {
2070 size_t i, j, k;
2071 size_t mid = (to + from) / 2;
2072 mergesort_internal (base, temp, size, from, mid, cmpfun);
2073 mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2074 i = from;
2075 j = mid + 1;
2076 for (k = from; (i <= mid) && (j <= to); k++)
2077 if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2078 memcpy (ELT (temp, k), ELT (base, i++), size);
2079 else
2080 memcpy (ELT (temp, k), ELT (base, j++), size);
2081 while (i <= mid)
2082 memcpy (ELT (temp, k++), ELT (base, i++), size);
2083 while (j <= to)
2084 memcpy (ELT (temp, k++), ELT (base, j++), size);
2085 for (k = from; k <= to; k++)
2086 memcpy (ELT (base, k), ELT (temp, k), size);
2087 }
2088#undef ELT
2089}
2090
2091/* Stable sort with interface exactly like standard library's qsort.
2092 Uses mergesort internally, allocating temporary storage with
2093 alloca. */
2094
2095void
2096stable_sort (void *base, size_t nmemb, size_t size,
2097 int (*cmpfun) PARAMS ((const void *, const void *)))
2098{
2099 if (size > 1)
2100 {
2101 void *temp = alloca (nmemb * size * sizeof (void *));
2102 mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2103 }
2104}
Note: See TracBrowser for help on using the repository browser.