Context Navigation

utils.c

Visit:

Last change on this file was 3440, checked in by bird, 18 years ago
wget 1.10.2
File size: 58.9 KB

Line
1	/* Various utility functions.
2	Copyright (C) 2005 Free Software Foundation, Inc.
3
4	This file is part of GNU Wget.
5
6	GNU Wget is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 2 of the License, or
9	(at your option) any later version.
10
11	GNU Wget is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with Wget; if not, write to the Free Software
18	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20	In addition, as a special exception, the Free Software Foundation
21	gives permission to link the code of its release of Wget with the
22	OpenSSL project's "OpenSSL" library (or with modified versions of it
23	that use the same license as the "OpenSSL" library), and distribute
24	the linked executables. You must obey the GNU General Public License
25	in all respects for all of the code used other than "OpenSSL". If you
26	modify this file, you may extend this exception to your version of the
27	file, but you are not obligated to do so. If you do not wish to do
28	so, delete this exception statement from your version. */
29
30	#include <config.h>
31
32	#include <stdio.h>
33	#include <stdlib.h>
34	#ifdef HAVE_STRING_H
35	# include <string.h>
36	#else /* not HAVE_STRING_H */
37	# include <strings.h>
38	#endif /* not HAVE_STRING_H */
39	#include <sys/types.h>
40	#ifdef HAVE_UNISTD_H
41	# include <unistd.h>
42	#endif
43	#ifdef HAVE_MMAP
44	# include <sys/mman.h>
45	#endif
46	#ifdef HAVE_PWD_H
47	# include <pwd.h>
48	#endif
49	#ifdef HAVE_LIMITS_H
50	# include <limits.h>
51	#endif
52	#ifdef HAVE_UTIME_H
53	# include <utime.h>
54	#endif
55	#ifdef HAVE_SYS_UTIME_H
56	# include <sys/utime.h>
57	#endif
58	#include <errno.h>
59	#ifdef NeXT
60	# include <libc.h> /* for access() */
61	#endif
62	#include <fcntl.h>
63	#include <assert.h>
64	#ifdef WGET_USE_STDARG
65	# include <stdarg.h>
66	#else
67	# include <varargs.h>
68	#endif
69	#ifdef HAVE_LOCALE_H
70	# include <locale.h>
71	#endif
72
73	/* For TIOCGWINSZ and friends: */
74	#ifdef HAVE_SYS_IOCTL_H
75	# include <sys/ioctl.h>
76	#endif
77	#ifdef HAVE_TERMIOS_H
78	# include <termios.h>
79	#endif
80
81	/* Needed for run_with_timeout. */
82	#undef USE_SIGNAL_TIMEOUT
83	#ifdef HAVE_SIGNAL_H
84	# include <signal.h>
85	#endif
86	#ifdef HAVE_SETJMP_H
87	# include <setjmp.h>
88	#endif
89
90	#ifndef HAVE_SIGSETJMP
91	/* If sigsetjmp is a macro, configure won't pick it up. */
92	# ifdef sigsetjmp
93	# define HAVE_SIGSETJMP
94	# endif
95	#endif
96
97	#ifdef HAVE_SIGNAL
98	# ifdef HAVE_SIGSETJMP
99	# define USE_SIGNAL_TIMEOUT
100	# endif
101	# ifdef HAVE_SIGBLOCK
102	# define USE_SIGNAL_TIMEOUT
103	# endif
104	#endif
105
106	#include "wget.h"
107	#include "utils.h"
108	#include "hash.h"
109
110	#ifndef errno
111	extern int errno;
112	#endif
113
114	/* Utility function: like xstrdup(), but also lowercases S. */
115
116	char *
117	xstrdup_lower (const char *s)
118	{
119	char *copy = xstrdup (s);
120	char *p = copy;
121	for (; *p; p++)
122	p = TOLOWER (p);
123	return copy;
124	}
125
126	/* Copy the string formed by two pointers (one on the beginning, other
127	on the char after the last char) to a new, malloc-ed location.
128	0-terminate it. */
129	char *
130	strdupdelim (const char beg, const char end)
131	{
132	char res = (char )xmalloc (end - beg + 1);
133	memcpy (res, beg, end - beg);
134	res[end - beg] = '\0';
135	return res;
136	}
137
138	/* Parse a string containing comma-separated elements, and return a
139	vector of char pointers with the elements. Spaces following the
140	commas are ignored. */
141	char **
142	sepstring (const char *s)
143	{
144	char **res;
145	const char *p;
146	int i = 0;
147
148	if (!s \|\| !*s)
149	return NULL;
150	res = NULL;
151	p = s;
152	while (*s)
153	{
154	if (*s == ',')
155	{
156	res = (char *)xrealloc (res, (i + 2) sizeof (char *));
157	res[i] = strdupdelim (p, s);
158	res[++i] = NULL;
159	++s;
160	/* Skip the blanks following the ','. */
161	while (ISSPACE (*s))
162	++s;
163	p = s;
164	}
165	else
166	++s;
167	}
168	res = (char *)xrealloc (res, (i + 2) sizeof (char *));
169	res[i] = strdupdelim (p, s);
170	res[i + 1] = NULL;
171	return res;
172	}
173
174
175	#ifdef WGET_USE_STDARG
176	# define VA_START(args, arg1) va_start (args, arg1)
177	#else
178	# define VA_START(args, ignored) va_start (args)
179	#endif
180
181	/* Like sprintf, but allocates a string of sufficient size with malloc
182	and returns it. GNU libc has a similar function named asprintf,
183	which requires the pointer to the string to be passed. */
184
185	char *
186	aprintf (const char *fmt, ...)
187	{
188	/* This function is implemented using vsnprintf, which we provide
189	for the systems that don't have it. Therefore, it should be 100%
190	portable. */
191
192	int size = 32;
193	char *str = xmalloc (size);
194
195	while (1)
196	{
197	int n;
198	va_list args;
199
200	/* See log_vprintf_internal for explanation why it's OK to rely
201	on the return value of vsnprintf. */
202
203	VA_START (args, fmt);
204	n = vsnprintf (str, size, fmt, args);
205	va_end (args);
206
207	/* If the printing worked, return the string. */
208	if (n > -1 && n < size)
209	return str;
210
211	/* Else try again with a larger buffer. */
212	if (n > -1) /* C99 */
213	size = n + 1; /* precisely what is needed */
214	else
215	size <<= 1; /* twice the old size */
216	str = xrealloc (str, size);
217	}
218	}
219
220	/* Concatenate the NULL-terminated list of string arguments into
221	freshly allocated space. */
222
223	char *
224	concat_strings (const char *str0, ...)
225	{
226	va_list args;
227	int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */
228	char ret, p;
229
230	const char *next_str;
231	int total_length = 0;
232	int argcount;
233
234	/* Calculate the length of and allocate the resulting string. */
235
236	argcount = 0;
237	VA_START (args, str0);
238	for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
239	{
240	int len = strlen (next_str);
241	if (argcount < countof (saved_lengths))
242	saved_lengths[argcount++] = len;
243	total_length += len;
244	}
245	va_end (args);
246	p = ret = xmalloc (total_length + 1);
247
248	/* Copy the strings into the allocated space. */
249
250	argcount = 0;
251	VA_START (args, str0);
252	for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
253	{
254	int len;
255	if (argcount < countof (saved_lengths))
256	len = saved_lengths[argcount++];
257	else
258	len = strlen (next_str);
259	memcpy (p, next_str, len);
260	p += len;
261	}
262	va_end (args);
263	*p = '\0';
264
265	return ret;
266	}
267
268
269	/* Return pointer to a static char[] buffer in which zero-terminated
270	string-representation of TM (in form hh:mm:ss) is printed.
271
272	If TM is NULL, the current time will be used. */
273
274	char *
275	time_str (time_t *tm)
276	{
277	static char output[15];
278	struct tm *ptm;
279	time_t secs = tm ? *tm : time (NULL);
280
281	if (secs == -1)
282	{
283	/* In case of error, return the empty string. Maybe we should
284	just abort if this happens? */
285	*output = '\0';
286	return output;
287	}
288	ptm = localtime (&secs);
289	sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
290	return output;
291	}
292
293	/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
294
295	char *
296	datetime_str (time_t *tm)
297	{
298	static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
299	struct tm *ptm;
300	time_t secs = tm ? *tm : time (NULL);
301
302	if (secs == -1)
303	{
304	/* In case of error, return the empty string. Maybe we should
305	just abort if this happens? */
306	*output = '\0';
307	return output;
308	}
309	ptm = localtime (&secs);
310	sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
311	ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
312	ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
313	return output;
314	}
315
316
317	/* The Windows versions of the following two functions are defined in
318	mswindows.c. */
319
320	#ifndef WINDOWS
321	void
322	fork_to_background (void)
323	{
324	pid_t pid;
325	/* Whether we arrange our own version of opt.lfilename here. */
326	int logfile_changed = 0;
327
328	if (!opt.lfilename)
329	{
330	/* We must create the file immediately to avoid either a race
331	condition (which arises from using unique_name and failing to
332	use fopen_excl) or lying to the user about the log file name
333	(which arises from using unique_name, printing the name, and
334	using fopen_excl later on.) */
335	FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, 0, &opt.lfilename);
336	if (new_log_fp)
337	{
338	logfile_changed = 1;
339	fclose (new_log_fp);
340	}
341	}
342	pid = fork ();
343	if (pid < 0)
344	{
345	/* parent, error */
346	perror ("fork");
347	exit (1);
348	}
349	else if (pid != 0)
350	{
351	/* parent, no error */
352	printf (_("Continuing in background, pid %d.\n"), (int)pid);
353	if (logfile_changed)
354	printf (_("Output will be written to `%s'.\n"), opt.lfilename);
355	exit (0); /* #### should we use _exit()? */
356	}
357
358	/* child: give up the privileges and keep running. */
359	setsid ();
360	freopen ("/dev/null", "r", stdin);
361	freopen ("/dev/null", "w", stdout);
362	freopen ("/dev/null", "w", stderr);
363	}
364	#endif /* not WINDOWS */
365
366
367	/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
368	specified with TM. The atime ("access time") is set to the current
369	time. */
370
371	void
372	touch (const char *file, time_t tm)
373	{
374	#ifdef HAVE_STRUCT_UTIMBUF
375	struct utimbuf times;
376	#else
377	struct {
378	time_t actime;
379	time_t modtime;
380	} times;
381	#endif
382	times.modtime = tm;
383	times.actime = time (NULL);
384	if (utime (file, &times) == -1)
385	logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
386	}
387
388	/* Checks if FILE is a symbolic link, and removes it if it is. Does
389	nothing under MS-Windows. */
390	int
391	remove_link (const char *file)
392	{
393	int err = 0;
394	struct_stat st;
395
396	if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
397	{
398	DEBUGP (("Unlinking %s (symlink).\n", file));
399	err = unlink (file);
400	if (err != 0)
401	logprintf (LOG_VERBOSE, _("Failed to unlink symlink `%s': %s\n"),
402	file, strerror (errno));
403	}
404	return err;
405	}
406
407	/* Does FILENAME exist? This is quite a lousy implementation, since
408	it supplies no error codes -- only a yes-or-no answer. Thus it
409	will return that a file does not exist if, e.g., the directory is
410	unreadable. I don't mind it too much currently, though. The
411	proper way should, of course, be to have a third, error state,
412	other than true/false, but that would introduce uncalled-for
413	additional complexity to the callers. */
414	int
415	file_exists_p (const char *filename)
416	{
417	#ifdef HAVE_ACCESS
418	return access (filename, F_OK) >= 0;
419	#else
420	struct_stat buf;
421	return stat (filename, &buf) >= 0;
422	#endif
423	}
424
425	/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
426	Returns 0 on error. */
427	int
428	file_non_directory_p (const char *path)
429	{
430	struct_stat buf;
431	/* Use lstat() rather than stat() so that symbolic links pointing to
432	directories can be identified correctly. */
433	if (lstat (path, &buf) != 0)
434	return 0;
435	return S_ISDIR (buf.st_mode) ? 0 : 1;
436	}
437
438	/* Return the size of file named by FILENAME, or -1 if it cannot be
439	opened or seeked into. */
440	wgint
441	file_size (const char *filename)
442	{
443	#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
444	wgint size;
445	/* We use fseek rather than stat to determine the file size because
446	that way we can also verify that the file is readable without
447	explicitly checking for permissions. Inspired by the POST patch
448	by Arnaud Wylie. */
449	FILE *fp = fopen (filename, "rb");
450	if (!fp)
451	return -1;
452	fseeko (fp, 0, SEEK_END);
453	size = ftello (fp);
454	fclose (fp);
455	return size;
456	#else
457	struct_stat st;
458	if (stat (filename, &st) < 0)
459	return -1;
460	return st.st_size;
461	#endif
462	}
463
464	/* stat file names named PREFIX.1, PREFIX.2, etc., until one that
465	doesn't exist is found. Return a freshly allocated copy of the
466	unused file name. */
467
468	static char *
469	unique_name_1 (const char *prefix)
470	{
471	int count = 1;
472	int plen = strlen (prefix);
473	char template = (char )alloca (plen + 1 + 24);
474	char *template_tail = template + plen;
475
476	memcpy (template, prefix, plen);
477	*template_tail++ = '.';
478
479	do
480	number_to_string (template_tail, count++);
481	while (file_exists_p (template));
482
483	return xstrdup (template);
484	}
485
486	/* Return a unique file name, based on FILE.
487
488	More precisely, if FILE doesn't exist, it is returned unmodified.
489	If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number>
490	file name that doesn't exist is returned.
491
492	The resulting file is not created, only verified that it didn't
493	exist at the point in time when the function was called.
494	Therefore, where security matters, don't rely that the file created
495	by this function exists until you open it with O_EXCL or
496	equivalent.
497
498	If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
499	string. Otherwise, it may return FILE if the file doesn't exist
500	(and therefore doesn't need changing). */
501
502	char *
503	unique_name (const char *file, int allow_passthrough)
504	{
505	/* If the FILE itself doesn't exist, return it without
506	modification. */
507	if (!file_exists_p (file))
508	return allow_passthrough ? (char *)file : xstrdup (file);
509
510	/* Otherwise, find a numeric suffix that results in unused file name
511	and return it. */
512	return unique_name_1 (file);
513	}
514
515	/* Create a file based on NAME, except without overwriting an existing
516	file with that name. Providing O_EXCL is correctly implemented,
517	this function does not have the race condition associated with
518	opening the file returned by unique_name. */
519
520	FILE *
521	unique_create (const char name, int binary, char *opened_name)
522	{
523	/* unique file name, based on NAME */
524	char *uname = unique_name (name, 0);
525	FILE *fp;
526	while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
527	{
528	xfree (uname);
529	uname = unique_name (name, 0);
530	}
531	if (opened_name && fp != NULL)
532	{
533	if (fp)
534	*opened_name = uname;
535	else
536	{
537	*opened_name = NULL;
538	xfree (uname);
539	}
540	}
541	else
542	xfree (uname);
543	return fp;
544	}
545
546	/* Open the file for writing, with the addition that the file is
547	opened "exclusively". This means that, if the file already exists,
548	this function will fail and errno will be set to EEXIST. If
549	BINARY is set, the file will be opened in binary mode, equivalent
550	to fopen's "wb".
551
552	If opening the file fails for any reason, including the file having
553	previously existed, this function returns NULL and sets errno
554	appropriately. */
555
556	FILE *
557	fopen_excl (const char *fname, int binary)
558	{
559	int fd;
560	#ifdef O_EXCL
561	int flags = O_WRONLY \| O_CREAT \| O_EXCL;
562	# ifdef O_BINARY
563	if (binary)
564	flags \|= O_BINARY;
565	# endif
566	fd = open (fname, flags, 0666);
567	if (fd < 0)
568	return NULL;
569	return fdopen (fd, binary ? "wb" : "w");
570	#else /* not O_EXCL */
571	/* Manually check whether the file exists. This is prone to race
572	conditions, but systems without O_EXCL haven't deserved
573	better. */
574	if (file_exists_p (fname))
575	{
576	errno = EEXIST;
577	return NULL;
578	}
579	return fopen (fname, binary ? "wb" : "w");
580	#endif /* not O_EXCL */
581	}
582
583
584	/* Create DIRECTORY. If some of the pathname components of DIRECTORY
585	are missing, create them first. In case any mkdir() call fails,
586	return its error status. Returns 0 on successful completion.
587
588	The behaviour of this function should be identical to the behaviour
589	of `mkdir -p' on systems where mkdir supports the `-p' option. */
590	int
591	make_directory (const char *directory)
592	{
593	int i, ret, quit = 0;
594	char *dir;
595
596	/* Make a copy of dir, to be able to write to it. Otherwise, the
597	function is unsafe if called with a read-only char argument. /
598	STRDUP_ALLOCA (dir, directory);
599
600	/* If the first character of dir is '/', skip it (and thus enable
601	creation of absolute-pathname directories. */
602	for (i = (*dir == '/'); 1; ++i)
603	{
604	for (; dir[i] && dir[i] != '/'; i++)
605	;
606	if (!dir[i])
607	quit = 1;
608	dir[i] = '\0';
609	/* Check whether the directory already exists. Allow creation of
610	of intermediate directories to fail, as the initial path components
611	are not necessarily directories! */
612	if (!file_exists_p (dir))
613	ret = mkdir (dir, 0777);
614	else
615	ret = 0;
616	if (quit)
617	break;
618	else
619	dir[i] = '/';
620	}
621	return ret;
622	}
623
624	/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
625	should be a file name.
626
627	file_merge("/foo/bar", "baz") => "/foo/baz"
628	file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
629	file_merge("foo", "bar") => "bar"
630
631	In other words, it's a simpler and gentler version of uri_merge_1. */
632
633	char *
634	file_merge (const char base, const char file)
635	{
636	char *result;
637	const char cut = (const char )strrchr (base, '/');
638
639	if (!cut)
640	return xstrdup (file);
641
642	result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
643	memcpy (result, base, cut - base);
644	result[cut - base] = '/';
645	strcpy (result + (cut - base) + 1, file);
646
647	return result;
648	}
649
650
651	static int in_acclist PARAMS ((const char const , const char *, int));
652
653	/* Determine whether a file is acceptable to be followed, according to
654	lists of patterns to accept/reject. */
655	int
656	acceptable (const char *s)
657	{
658	int l = strlen (s);
659
660	while (l && s[l] != '/')
661	--l;
662	if (s[l] == '/')
663	s += (l + 1);
664	if (opt.accepts)
665	{
666	if (opt.rejects)
667	return (in_acclist ((const char const )opt.accepts, s, 1)
668	&& !in_acclist ((const char const )opt.rejects, s, 1));
669	else
670	return in_acclist ((const char const )opt.accepts, s, 1);
671	}
672	else if (opt.rejects)
673	return !in_acclist ((const char const )opt.rejects, s, 1);
674	return 1;
675	}
676
677	/* Compare S1 and S2 frontally; S2 must begin with S1. E.g. if S1 is
678	`/something', frontcmp() will return 1 only if S2 begins with
679	`/something'. Otherwise, 0 is returned. */
680	int
681	frontcmp (const char s1, const char s2)
682	{
683	for (; s1 && s2 && (s1 == s2); ++s1, ++s2);
684	return !*s1;
685	}
686
687	/* Iterate through STRLIST, and return the first element that matches
688	S, through wildcards or front comparison (as appropriate). */
689	static char *
690	proclist (char *strlist, const char s, enum accd flags)
691	{
692	char **x;
693	for (x = strlist; *x; x++)
694	{
695	/* Remove leading '/' if ALLABS */
696	char p = x + ((flags & ALLABS) && (**x == '/'));
697	if (has_wildcards_p (p))
698	{
699	if (fnmatch (p, s, FNM_PATHNAME) == 0)
700	break;
701	}
702	else
703	{
704	if (frontcmp (p, s))
705	break;
706	}
707	}
708	return *x;
709	}
710
711	/* Returns whether DIRECTORY is acceptable for download, wrt the
712	include/exclude lists.
713
714	If FLAGS is ALLABS, the leading `/' is ignored in paths; relative
715	and absolute paths may be freely intermixed. */
716	int
717	accdir (const char *directory, enum accd flags)
718	{
719	/* Remove starting '/'. */
720	if (flags & ALLABS && *directory == '/')
721	++directory;
722	if (opt.includes)
723	{
724	if (!proclist (opt.includes, directory, flags))
725	return 0;
726	}
727	if (opt.excludes)
728	{
729	if (proclist (opt.excludes, directory, flags))
730	return 0;
731	}
732	return 1;
733	}
734
735	/* Return non-zero if STRING ends with TAIL. For instance:
736
737	match_tail ("abc", "bc", 0) -> 1
738	match_tail ("abc", "ab", 0) -> 0
739	match_tail ("abc", "abc", 0) -> 1
740
741	If FOLD_CASE_P is non-zero, the comparison will be
742	case-insensitive. */
743
744	int
745	match_tail (const char string, const char tail, int fold_case_p)
746	{
747	int i, j;
748
749	/* We want this to be fast, so we code two loops, one with
750	case-folding, one without. */
751
752	if (!fold_case_p)
753	{
754	for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
755	if (string[i] != tail[j])
756	break;
757	}
758	else
759	{
760	for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--)
761	if (TOLOWER (string[i]) != TOLOWER (tail[j]))
762	break;
763	}
764
765	/* If the tail was exhausted, the match was succesful. */
766	if (j == -1)
767	return 1;
768	else
769	return 0;
770	}
771
772	/* Checks whether string S matches each element of ACCEPTS. A list
773	element are matched either with fnmatch() or match_tail(),
774	according to whether the element contains wildcards or not.
775
776	If the BACKWARD is 0, don't do backward comparison -- just compare
777	them normally. */
778	static int
779	in_acclist (const char const accepts, const char *s, int backward)
780	{
781	for (; *accepts; accepts++)
782	{
783	if (has_wildcards_p (*accepts))
784	{
785	/* fnmatch returns 0 if the pattern does match the
786	string. */
787	if (fnmatch (*accepts, s, 0) == 0)
788	return 1;
789	}
790	else
791	{
792	if (backward)
793	{
794	if (match_tail (s, *accepts, 0))
795	return 1;
796	}
797	else
798	{
799	if (!strcmp (s, *accepts))
800	return 1;
801	}
802	}
803	}
804	return 0;
805	}
806
807	/* Return the location of STR's suffix (file extension). Examples:
808	suffix ("foo.bar") -> "bar"
809	suffix ("foo.bar.baz") -> "baz"
810	suffix ("/foo/bar") -> NULL
811	suffix ("/foo.bar/baz") -> NULL */
812	char *
813	suffix (const char *str)
814	{
815	int i;
816
817	for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
818	;
819
820	if (str[i++] == '.')
821	return (char *)str + i;
822	else
823	return NULL;
824	}
825
826	/* Return non-zero if S contains globbing wildcards (`*', `?', `[' or
827	`]'). */
828
829	int
830	has_wildcards_p (const char *s)
831	{
832	for (; *s; s++)
833	if (s == '' \|\| s == '?' \|\| s == '[' \|\| *s == ']')
834	return 1;
835	return 0;
836	}
837
838	/* Return non-zero if FNAME ends with a typical HTML suffix. The
839	following (case-insensitive) suffixes are presumed to be HTML files:
840
841	html
842	htm
843	?html (`?' matches one character)
844
845	#### CAVEAT. This is not necessarily a good indication that FNAME
846	refers to a file that contains HTML! */
847	int
848	has_html_suffix_p (const char *fname)
849	{
850	char *suf;
851
852	if ((suf = suffix (fname)) == NULL)
853	return 0;
854	if (!strcasecmp (suf, "html"))
855	return 1;
856	if (!strcasecmp (suf, "htm"))
857	return 1;
858	if (suf[0] && !strcasecmp (suf + 1, "html"))
859	return 1;
860	return 0;
861	}
862
863	/* Read a line from FP and return the pointer to freshly allocated
864	storage. The storage space is obtained through malloc() and should
865	be freed with free() when it is no longer needed.
866
867	The length of the line is not limited, except by available memory.
868	The newline character at the end of line is retained. The line is
869	terminated with a zero character.
870
871	After end-of-file is encountered without anything being read, NULL
872	is returned. NULL is also returned on error. To distinguish
873	between these two cases, use the stdio function ferror(). */
874
875	char *
876	read_whole_line (FILE *fp)
877	{
878	int length = 0;
879	int bufsize = 82;
880	char line = (char )xmalloc (bufsize);
881
882	while (fgets (line + length, bufsize - length, fp))
883	{
884	length += strlen (line + length);
885	if (length == 0)
886	/* Possible for example when reading from a binary file where
887	a line begins with \0. */
888	continue;
889
890	if (line[length - 1] == '\n')
891	break;
892
893	/* fgets() guarantees to read the whole line, or to use up the
894	space we've given it. We can double the buffer
895	unconditionally. */
896	bufsize <<= 1;
897	line = xrealloc (line, bufsize);
898	}
899	if (length == 0 \|\| ferror (fp))
900	{
901	xfree (line);
902	return NULL;
903	}
904	if (length + 1 < bufsize)
905	/* Relieve the memory from our exponential greediness. We say
906	`length + 1' because the terminating \0 is not included in
907	LENGTH. We don't need to zero-terminate the string ourselves,
908	though, because fgets() does that. */
909	line = xrealloc (line, length + 1);
910	return line;
911	}
912
913
914	/* Read FILE into memory. A pointer to `struct file_memory' are
915	returned; use struct element `content' to access file contents, and
916	the element `length' to know the file length. `content' is not
917	zero-terminated, and you should not read or write beyond the [0,
918	length) range of characters.
919
920	After you are done with the file contents, call read_file_free to
921	release the memory.
922
923	Depending on the operating system and the type of file that is
924	being read, read_file() either mmap's the file into memory, or
925	reads the file into the core using read().
926
927	If file is named "-", fileno(stdin) is used for reading instead.
928	If you want to read from a real file named "-", use "./-" instead. */
929
930	struct file_memory *
931	read_file (const char *file)
932	{
933	int fd;
934	struct file_memory *fm;
935	long size;
936	int inhibit_close = 0;
937
938	/* Some magic in the finest tradition of Perl and its kin: if FILE
939	is "-", just use stdin. */
940	if (HYPHENP (file))
941	{
942	fd = fileno (stdin);
943	inhibit_close = 1;
944	/* Note that we don't inhibit mmap() in this case. If stdin is
945	redirected from a regular file, mmap() will still work. */
946	}
947	else
948	fd = open (file, O_RDONLY);
949	if (fd < 0)
950	return NULL;
951	fm = xnew (struct file_memory);
952
953	#ifdef HAVE_MMAP
954	{
955	struct_fstat buf;
956	if (fstat (fd, &buf) < 0)
957	goto mmap_lose;
958	fm->length = buf.st_size;
959	/* NOTE: As far as I know, the callers of this function never
960	modify the file text. Relying on this would enable us to
961	specify PROT_READ and MAP_SHARED for a marginal gain in
962	efficiency, but at some cost to generality. */
963	fm->content = mmap (NULL, fm->length, PROT_READ \| PROT_WRITE,
964	MAP_PRIVATE, fd, 0);
965	if (fm->content == (char *)MAP_FAILED)
966	goto mmap_lose;
967	if (!inhibit_close)
968	close (fd);
969
970	fm->mmap_p = 1;
971	return fm;
972	}
973
974	mmap_lose:
975	/* The most common reason why mmap() fails is that FD does not point
976	to a plain file. However, it's also possible that mmap() doesn't
977	work for a particular type of file. Therefore, whenever mmap()
978	fails, we just fall back to the regular method. */
979	#endif /* HAVE_MMAP */
980
981	fm->length = 0;
982	size = 512; /* number of bytes fm->contents can
983	hold at any given time. */
984	fm->content = xmalloc (size);
985	while (1)
986	{
987	wgint nread;
988	if (fm->length > size / 2)
989	{
990	/* #### I'm not sure whether the whole exponential-growth
991	thing makes sense with kernel read. On Linux at least,
992	read() refuses to read more than 4K from a file at a
993	single chunk anyway. But other Unixes might optimize it
994	better, and it doesn't hurt anything, so I'm leaving
995	it. */
996
997	/* Normally, we grow SIZE exponentially to make the number
998	of calls to read() and realloc() logarithmic in relation
999	to file size. However, read() can read an amount of data
1000	smaller than requested, and it would be unreasonable to
1001	double SIZE every time something was read. Therefore,
1002	we double SIZE only when the length exceeds half of the
1003	entire allocated size. */
1004	size <<= 1;
1005	fm->content = xrealloc (fm->content, size);
1006	}
1007	nread = read (fd, fm->content + fm->length, size - fm->length);
1008	if (nread > 0)
1009	/* Successful read. */
1010	fm->length += nread;
1011	else if (nread < 0)
1012	/* Error. */
1013	goto lose;
1014	else
1015	/* EOF */
1016	break;
1017	}
1018	if (!inhibit_close)
1019	close (fd);
1020	if (size > fm->length && fm->length != 0)
1021	/* Due to exponential growth of fm->content, the allocated region
1022	might be much larger than what is actually needed. */
1023	fm->content = xrealloc (fm->content, fm->length);
1024	fm->mmap_p = 0;
1025	return fm;
1026
1027	lose:
1028	if (!inhibit_close)
1029	close (fd);
1030	xfree (fm->content);
1031	xfree (fm);
1032	return NULL;
1033	}
1034
1035	/* Release the resources held by FM. Specifically, this calls
1036	munmap() or xfree() on fm->content, depending whether mmap or
1037	malloc/read were used to read in the file. It also frees the
1038	memory needed to hold the FM structure itself. */
1039
1040	void
1041	read_file_free (struct file_memory *fm)
1042	{
1043	#ifdef HAVE_MMAP
1044	if (fm->mmap_p)
1045	{
1046	munmap (fm->content, fm->length);
1047	}
1048	else
1049	#endif
1050	{
1051	xfree (fm->content);
1052	}
1053	xfree (fm);
1054	}
1055
1056
1057	/* Free the pointers in a NULL-terminated vector of pointers, then
1058	free the pointer itself. */
1059	void
1060	free_vec (char **vec)
1061	{
1062	if (vec)
1063	{
1064	char **p = vec;
1065	while (*p)
1066	xfree (*p++);
1067	xfree (vec);
1068	}
1069	}
1070
1071	/* Append vector V2 to vector V1. The function frees V2 and
1072	reallocates V1 (thus you may not use the contents of neither
1073	pointer after the call). If V1 is NULL, V2 is returned. */
1074	char **
1075	merge_vecs (char v1, char v2)
1076	{
1077	int i, j;
1078
1079	if (!v1)
1080	return v2;
1081	if (!v2)
1082	return v1;
1083	if (!*v2)
1084	{
1085	/* To avoid j == 0 */
1086	xfree (v2);
1087	return v1;
1088	}
1089	/* Count v1. */
1090	for (i = 0; v1[i]; i++);
1091	/* Count v2. */
1092	for (j = 0; v2[j]; j++);
1093	/* Reallocate v1. */
1094	v1 = (char *)xrealloc (v1, (i + j + 1) sizeof (char **));
1095	memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1096	xfree (v2);
1097	return v1;
1098	}
1099
1100	/* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it
1101	is allocated as needed. Return the new value of the vector. */
1102
1103	char **
1104	vec_append (char *vec, const char str)
1105	{
1106	int cnt; /* count of vector elements, including
1107	the one we're about to append */
1108	if (vec != NULL)
1109	{
1110	for (cnt = 0; vec[cnt]; cnt++)
1111	;
1112	++cnt;
1113	}
1114	else
1115	cnt = 1;
1116	/* Reallocate the array to fit the new element and the NULL. */
1117	vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1118	/* Append a copy of STR to the vector. */
1119	vec[cnt - 1] = xstrdup (str);
1120	vec[cnt] = NULL;
1121	return vec;
1122	}
1123
1124
1125	/* Sometimes it's useful to create "sets" of strings, i.e. special
1126	hash tables where you want to store strings as keys and merely
1127	query for their existence. Here is a set of utility routines that
1128	makes that transparent. */
1129
1130	void
1131	string_set_add (struct hash_table ht, const char s)
1132	{
1133	/* First check whether the set element already exists. If it does,
1134	do nothing so that we don't have to free() the old element and
1135	then strdup() a new one. */
1136	if (hash_table_contains (ht, s))
1137	return;
1138
1139	/* We use "1" as value. It provides us a useful and clear arbitrary
1140	value, and it consumes no memory -- the pointers to the same
1141	string "1" will be shared by all the key-value pairs in all `set'
1142	hash tables. */
1143	hash_table_put (ht, xstrdup (s), "1");
1144	}
1145
1146	/* Synonym for hash_table_contains... */
1147
1148	int
1149	string_set_contains (struct hash_table ht, const char s)
1150	{
1151	return hash_table_contains (ht, s);
1152	}
1153
1154	static int
1155	string_set_to_array_mapper (void key, void value_ignored, void *arg)
1156	{
1157	char *arrayptr = (char *) arg;
1158	(arrayptr)++ = (char *) key;
1159	return 0;
1160	}
1161
1162	/* Convert the specified string set to array. ARRAY should be large
1163	enough to hold hash_table_count(ht) char pointers. */
1164
1165	void string_set_to_array (struct hash_table ht, char *array)
1166	{
1167	hash_table_map (ht, string_set_to_array_mapper, &array);
1168	}
1169
1170	static int
1171	string_set_free_mapper (void key, void value_ignored, void *arg_ignored)
1172	{
1173	xfree (key);
1174	return 0;
1175	}
1176
1177	void
1178	string_set_free (struct hash_table *ht)
1179	{
1180	hash_table_map (ht, string_set_free_mapper, NULL);
1181	hash_table_destroy (ht);
1182	}
1183
1184	static int
1185	free_keys_and_values_mapper (void key, void value, void *arg_ignored)
1186	{
1187	xfree (key);
1188	xfree (value);
1189	return 0;
1190	}
1191
1192	/* Another utility function: call free() on all keys and values of HT. */
1193
1194	void
1195	free_keys_and_values (struct hash_table *ht)
1196	{
1197	hash_table_map (ht, free_keys_and_values_mapper, NULL);
1198	}
1199
1200
1201	static void
1202	get_grouping_data (const char sep, const char grouping)
1203	{
1204	static const char *cached_sep;
1205	static const char *cached_grouping;
1206	static int initialized;
1207	if (!initialized)
1208	{
1209	/* If locale.h is present and defines LC_NUMERIC, assume C89
1210	struct lconv with "thousand_sep" and "grouping" members. */
1211	#ifdef LC_NUMERIC
1212	/* Get the grouping info from the locale. */
1213	struct lconv *lconv;
1214	const char *oldlocale = setlocale (LC_NUMERIC, NULL);
1215	/* Temporarily switch to the current locale */
1216	setlocale (LC_NUMERIC, "");
1217	lconv = localeconv ();
1218	cached_sep = xstrdup (lconv->thousands_sep);
1219	cached_grouping = xstrdup (lconv->grouping);
1220	/* Restore the locale to previous setting. */
1221	setlocale (LC_NUMERIC, oldlocale);
1222	if (!*cached_sep)
1223	#endif
1224	/* Force separator for locales that specify no separators
1225	("C", "hr", and probably many more.) */
1226	cached_sep = ",", cached_grouping = "\x03";
1227	initialized = 1;
1228	}
1229	*sep = cached_sep;
1230	*grouping = cached_grouping;
1231	}
1232
1233	/* Add thousand separators to a number already in string form. Used
1234	by with_thousand_seps and with_thousand_seps_sum. */
1235
1236	char *
1237	add_thousand_seps (const char *repr)
1238	{
1239	static char outbuf[48];
1240	char *p = outbuf + sizeof outbuf;
1241
1242	const char *in = strchr (repr, '\0');
1243	const char instart = repr + (repr == '-'); /* don't group sign */
1244
1245	/* Info received from locale */
1246	const char grouping, sep;
1247	int seplen;
1248
1249	/* State information */
1250	int i = 0, groupsize;
1251	const char *atgroup;
1252
1253	/* Initialize grouping data. */
1254	get_grouping_data (&sep, &grouping);
1255	seplen = strlen (sep);
1256	atgroup = grouping;
1257	groupsize = *atgroup++;
1258
1259	/* Write the number into the buffer, backwards, inserting the
1260	separators as necessary. */
1261	*--p = '\0';
1262	while (1)
1263	{
1264	--p = --in;
1265	if (in == instart)
1266	break;
1267	/* Prepend SEP to every groupsize'd digit and get new groupsize. */
1268	if (++i == groupsize)
1269	{
1270	if (seplen == 1)
1271	--p = sep;
1272	else
1273	memcpy (p -= seplen, sep, seplen);
1274	i = 0;
1275	if (*atgroup)
1276	groupsize = *atgroup++;
1277	}
1278	}
1279	if (*repr == '-')
1280	*--p = '-';
1281
1282	return p;
1283	}
1284
1285	/* Return a printed representation of N with thousand separators.
1286	This should respect locale settings, with the exception of the "C"
1287	locale which mandates no separator, but we use one anyway.
1288
1289	Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1290	the separators because it's too non-portable, and it's hard to test
1291	for this feature at configure time. Besides, it wouldn't work in
1292	the "C" locale, which many Unix users still work in. */
1293
1294	char *
1295	with_thousand_seps (wgint l)
1296	{
1297	char inbuf[24];
1298	/* Print the number into the buffer. */
1299	number_to_string (inbuf, l);
1300	return add_thousand_seps (inbuf);
1301	}
1302
1303	/* When SUM_SIZE_INT is wgint, with_thousand_seps_large is #defined to
1304	with_thousand_seps. The function below is used on non-LFS systems
1305	where SUM_SIZE_INT typedeffed to double. */
1306
1307	#ifndef with_thousand_seps_sum
1308	char *
1309	with_thousand_seps_sum (SUM_SIZE_INT l)
1310	{
1311	char inbuf[32];
1312	snprintf (inbuf, sizeof (inbuf), "%.0f", l);
1313	return add_thousand_seps (inbuf);
1314	}
1315	#endif /* not with_thousand_seps_sum */
1316
1317	/* N, a byte quantity, is converted to a human-readable abberviated
1318	form a la sizes printed by `ls -lh'. The result is written to a
1319	static buffer, a pointer to which is returned.
1320
1321	Unlike `with_thousand_seps', this approximates to the nearest unit.
1322	Quoting GNU libit: "Most people visually process strings of 3-4
1323	digits effectively, but longer strings of digits are more prone to
1324	misinterpretation. Hence, converting to an abbreviated form
1325	usually improves readability."
1326
1327	This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1328	original computer science meaning of "powers of 1024". Powers of
1329	1000 would be useless since Wget already displays sizes with
1330	thousand separators. We don't use the "*bibyte" names invented in
1331	1998, and seldom used in practice. Wikipedia's entry on kilobyte
1332	discusses this in some detail. */
1333
1334	char *
1335	human_readable (wgint n)
1336	{
1337	/* These suffixes are compatible with those of GNU `ls -lh'. */
1338	static char powers[] =
1339	{
1340	'K', /* kilobyte, 2^10 bytes */
1341	'M', /* megabyte, 2^20 bytes */
1342	'G', /* gigabyte, 2^30 bytes */
1343	'T', /* terabyte, 2^40 bytes */
1344	'P', /* petabyte, 2^50 bytes */
1345	'E', /* exabyte, 2^60 bytes */
1346	};
1347	static char buf[8];
1348	int i;
1349
1350	/* If the quantity is smaller than 1K, just print it. */
1351	if (n < 1024)
1352	{
1353	snprintf (buf, sizeof (buf), "%d", (int) n);
1354	return buf;
1355	}
1356
1357	/* Loop over powers, dividing N with 1024 in each iteration. This
1358	works unchanged for all sizes of wgint, while still avoiding
1359	non-portable `long double' arithmetic. */
1360	for (i = 0; i < countof (powers); i++)
1361	{
1362	/* At each iteration N is greater than the subsequent power.
1363	That way N/1024.0 produces a decimal number in the units of
1364	this power. */
1365	if ((n >> 10) < 1024 \|\| i == countof (powers) - 1)
1366	{
1367	double val = n / 1024.0;
1368	/* Print values smaller than 10 with one decimal digits, and
1369	others without any decimals. */
1370	snprintf (buf, sizeof (buf), "%.*f%c",
1371	val < 10 ? 1 : 0, val, powers[i]);
1372	return buf;
1373	}
1374	n >>= 10;
1375	}
1376	return NULL; /* unreached */
1377	}
1378
1379	/* Count the digits in the provided number. Used to allocate space
1380	when printing numbers. */
1381
1382	int
1383	numdigit (wgint number)
1384	{
1385	int cnt = 1;
1386	if (number < 0)
1387	++cnt; /* accomodate '-' */
1388	while ((number /= 10) != 0)
1389	++cnt;
1390	return cnt;
1391	}
1392
1393	#define PR(mask) *p++ = n / (mask) + '0'
1394
1395	/* DIGITS_<D> is used to print a D-digit number and should be called
1396	with mask==10^(D-1). It prints n/mask (the first digit), reducing
1397	n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1398	Recursively this continues until DIGITS_1 is invoked. */
1399
1400	#define DIGITS_1(mask) PR (mask)
1401	#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1402	#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1403	#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1404	#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1405	#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1406	#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1407	#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1408	#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1409	#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1410
1411	/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1412
1413	#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1414	#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1415	#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1416	#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1417	#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1418	#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1419	#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1420	#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1421	#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1422
1423	/* SPRINTF_WGINT is used by number_to_string to handle pathological
1424	cases and to portably support strange sizes of wgint. Ideally this
1425	would just use "%j" and intmax_t, but many systems don't support
1426	it, so it's used only if nothing else works. */
1427	#if SIZEOF_LONG >= SIZEOF_WGINT
1428	# define SPRINTF_WGINT(buf, n) sprintf (buf, "%ld", (long) (n))
1429	#else
1430	# if SIZEOF_LONG_LONG >= SIZEOF_WGINT
1431	# define SPRINTF_WGINT(buf, n) sprintf (buf, "%lld", (long long) (n))
1432	# else
1433	# ifdef WINDOWS
1434	# define SPRINTF_WGINT(buf, n) sprintf (buf, "%I64d", (__int64) (n))
1435	# else
1436	# define SPRINTF_WGINT(buf, n) sprintf (buf, "%j", (intmax_t) (n))
1437	# endif
1438	# endif
1439	#endif
1440
1441	/* Shorthand for casting to wgint. */
1442	#define W wgint
1443
1444	/* Print NUMBER to BUFFER in base 10. This is equivalent to
1445	`sprintf(buffer, "%lld", (long long) number)', only typically much
1446	faster and portable to machines without long long.
1447
1448	The speedup may make a difference in programs that frequently
1449	convert numbers to strings. Some implementations of sprintf,
1450	particularly the one in GNU libc, have been known to be extremely
1451	slow when converting integers to strings.
1452
1453	Return the pointer to the location where the terminating zero was
1454	printed. (Equivalent to calling buffer+strlen(buffer) after the
1455	function is done.)
1456
1457	BUFFER should be big enough to accept as many bytes as you expect
1458	the number to take up. On machines with 64-bit longs the maximum
1459	needed size is 24 bytes. That includes the digits needed for the
1460	largest 64-bit number, the `-' sign in case it's negative, and the
1461	terminating '\0'. */
1462
1463	char *
1464	number_to_string (char *buffer, wgint number)
1465	{
1466	char *p = buffer;
1467	wgint n = number;
1468
1469	#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1470	/* We are running in a strange or misconfigured environment. Let
1471	sprintf cope with it. */
1472	SPRINTF_WGINT (buffer, n);
1473	p += strlen (buffer);
1474	#else /* (SIZEOF_WGINT == 4) \|\| (SIZEOF_WGINT == 8) */
1475
1476	if (n < 0)
1477	{
1478	if (n < -WGINT_MAX)
1479	{
1480	/* -n would overflow. Have sprintf deal with this. */
1481	SPRINTF_WGINT (buffer, n);
1482	p += strlen (buffer);
1483	return p;
1484	}
1485
1486	*p++ = '-';
1487	n = -n;
1488	}
1489
1490	/* Use the DIGITS_ macro appropriate for N's number of digits. That
1491	way printing any N is fully open-coded without a loop or jump.
1492	(Also see description of DIGITS_.) /
1493
1494	if (n < 10) DIGITS_1 (1);
1495	else if (n < 100) DIGITS_2 (10);
1496	else if (n < 1000) DIGITS_3 (100);
1497	else if (n < 10000) DIGITS_4 (1000);
1498	else if (n < 100000) DIGITS_5 (10000);
1499	else if (n < 1000000) DIGITS_6 (100000);
1500	else if (n < 10000000) DIGITS_7 (1000000);
1501	else if (n < 100000000) DIGITS_8 (10000000);
1502	else if (n < 1000000000) DIGITS_9 (100000000);
1503	#if SIZEOF_WGINT == 4
1504	/* wgint is 32 bits wide: no number has more than 10 digits. */
1505	else DIGITS_10 (1000000000);
1506	#else
1507	/* wgint is 64 bits wide: handle numbers with more than 9 decimal
1508	digits. Constants are constructed by compile-time multiplication
1509	to avoid dealing with different notations for 64-bit constants
1510	(nnnL, nnnLL, and nnnI64, depending on the compiler). */
1511	else if (n < 10*(W)1000000000) DIGITS_10 (1000000000);
1512	else if (n < 100(W)1000000000) DIGITS_11 (10(W)1000000000);
1513	else if (n < 1000(W)1000000000) DIGITS_12 (100(W)1000000000);
1514	else if (n < 10000(W)1000000000) DIGITS_13 (1000(W)1000000000);
1515	else if (n < 100000(W)1000000000) DIGITS_14 (10000(W)1000000000);
1516	else if (n < 1000000(W)1000000000) DIGITS_15 (100000(W)1000000000);
1517	else if (n < 10000000(W)1000000000) DIGITS_16 (1000000(W)1000000000);
1518	else if (n < 100000000(W)1000000000) DIGITS_17 (10000000(W)1000000000);
1519	else if (n < 1000000000(W)1000000000) DIGITS_18 (100000000(W)1000000000);
1520	else DIGITS_19 (1000000000*(W)1000000000);
1521	#endif
1522
1523	*p = '\0';
1524	#endif /* (SIZEOF_WGINT == 4) \|\| (SIZEOF_WGINT == 8) */
1525
1526	return p;
1527	}
1528
1529	#undef PR
1530	#undef W
1531	#undef DIGITS_1
1532	#undef DIGITS_2
1533	#undef DIGITS_3
1534	#undef DIGITS_4
1535	#undef DIGITS_5
1536	#undef DIGITS_6
1537	#undef DIGITS_7
1538	#undef DIGITS_8
1539	#undef DIGITS_9
1540	#undef DIGITS_10
1541	#undef DIGITS_11
1542	#undef DIGITS_12
1543	#undef DIGITS_13
1544	#undef DIGITS_14
1545	#undef DIGITS_15
1546	#undef DIGITS_16
1547	#undef DIGITS_17
1548	#undef DIGITS_18
1549	#undef DIGITS_19
1550
1551	#define RING_SIZE 3
1552
1553	/* Print NUMBER to a statically allocated string and return a pointer
1554	to the printed representation.
1555
1556	This function is intended to be used in conjunction with printf.
1557	It is hard to portably print wgint values:
1558	a) you cannot use printf("%ld", number) because wgint can be long
1559	long on 32-bit machines with LFS.
1560	b) you cannot use printf("%lld", number) because NUMBER could be
1561	long on 32-bit machines without LFS, or on 64-bit machines,
1562	which do not require LFS. Also, Windows doesn't support %lld.
1563	c) you cannot use printf("%j", (int_max_t) number) because not all
1564	versions of printf support "%j", the most notable being the one
1565	on Windows.
1566	d) you cannot #define WGINT_FMT to the appropriate format and use
1567	printf(WGINT_FMT, number) because that would break translations
1568	for user-visible messages, such as printf("Downloaded: %d
1569	bytes\n", number).
1570
1571	What you should use instead is printf("%s", number_to_static_string
1572	(number)).
1573
1574	CAVEAT: since the function returns pointers to static data, you
1575	must be careful to copy its result before calling it again.
1576	However, to make it more useful with printf, the function maintains
1577	an internal ring of static buffers to return. That way things like
1578	printf("%s %s", number_to_static_string (num1),
1579	number_to_static_string (num2)) work as expected. Three buffers
1580	are currently used, which means that "%s %s %s" will work, but "%s
1581	%s %s %s" won't. If you need to print more than three wgints,
1582	bump the RING_SIZE (or rethink your message.) */
1583
1584	char *
1585	number_to_static_string (wgint number)
1586	{
1587	static char ring[RING_SIZE][24];
1588	static int ringpos;
1589	char *buf = ring[ringpos];
1590	number_to_string (buf, number);
1591	ringpos = (ringpos + 1) % RING_SIZE;
1592	return buf;
1593	}
1594
1595
1596	/* Determine the width of the terminal we're running on. If that's
1597	not possible, return 0. */
1598
1599	int
1600	determine_screen_width (void)
1601	{
1602	/* If there's a way to get the terminal size using POSIX
1603	tcgetattr(), somebody please tell me. */
1604	#ifdef TIOCGWINSZ
1605	int fd;
1606	struct winsize wsz;
1607
1608	if (opt.lfilename != NULL)
1609	return 0;
1610
1611	fd = fileno (stderr);
1612	if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1613	return 0; /* most likely ENOTTY */
1614
1615	return wsz.ws_col;
1616	#else /* not TIOCGWINSZ */
1617	# ifdef WINDOWS
1618	CONSOLE_SCREEN_BUFFER_INFO csbi;
1619	if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1620	return 0;
1621	return csbi.dwSize.X;
1622	# else /* neither WINDOWS nor TIOCGWINSZ */
1623	return 0;
1624	#endif /* neither WINDOWS nor TIOCGWINSZ */
1625	#endif /* not TIOCGWINSZ */
1626	}
1627
1628	/* Return a random number between 0 and MAX-1, inclusive.
1629
1630	If MAX is greater than the value of RAND_MAX+1 on the system, the
1631	returned value will be in the range [0, RAND_MAX]. This may be
1632	fixed in a future release.
1633
1634	The random number generator is seeded automatically the first time
1635	it is called.
1636
1637	This uses rand() for portability. It has been suggested that
1638	random() offers better randomness, but this is not required for
1639	Wget, so I chose to go for simplicity and use rand
1640	unconditionally.
1641
1642	DO NOT use this for cryptographic purposes. It is only meant to be
1643	used in situations where quality of the random numbers returned
1644	doesn't really matter. */
1645
1646	int
1647	random_number (int max)
1648	{
1649	static int seeded;
1650	double bounded;
1651	int rnd;
1652
1653	if (!seeded)
1654	{
1655	srand (time (NULL));
1656	seeded = 1;
1657	}
1658	rnd = rand ();
1659
1660	/* On systems that don't define RAND_MAX, assume it to be 2**15 - 1,
1661	and enforce that assumption by masking other bits. */
1662	#ifndef RAND_MAX
1663	# define RAND_MAX 32767
1664	rnd &= RAND_MAX;
1665	#endif
1666
1667	/* This is equivalent to rand() % max, but uses the high-order bits
1668	for better randomness on architecture where rand() is implemented
1669	using a simple congruential generator. */
1670
1671	bounded = (double)max * rnd / (RAND_MAX + 1.0);
1672	return (int)bounded;
1673	}
1674
1675	/* Return a random uniformly distributed floating point number in the
1676	[0, 1) range. The precision of returned numbers is 9 digits.
1677
1678	Modify this to use erand48() where available! */
1679
1680	double
1681	random_float (void)
1682	{
1683	/* We can't rely on any specific value of RAND_MAX, but I'm pretty
1684	sure it's greater than 1000. */
1685	int rnd1 = random_number (1000);
1686	int rnd2 = random_number (1000);
1687	int rnd3 = random_number (1000);
1688	return rnd1 / 1000.0 + rnd2 / 1000000.0 + rnd3 / 1000000000.0;
1689	}
1690
1691
1692	/* Implementation of run_with_timeout, a generic timeout-forcing
1693	routine for systems with Unix-like signal handling. */
1694
1695	#ifdef USE_SIGNAL_TIMEOUT
1696	# ifdef HAVE_SIGSETJMP
1697	# define SETJMP(env) sigsetjmp (env, 1)
1698
1699	static sigjmp_buf run_with_timeout_env;
1700
1701	static RETSIGTYPE
1702	abort_run_with_timeout (int sig)
1703	{
1704	assert (sig == SIGALRM);
1705	siglongjmp (run_with_timeout_env, -1);
1706	}
1707	# else /* not HAVE_SIGSETJMP */
1708	# define SETJMP(env) setjmp (env)
1709
1710	static jmp_buf run_with_timeout_env;
1711
1712	static RETSIGTYPE
1713	abort_run_with_timeout (int sig)
1714	{
1715	assert (sig == SIGALRM);
1716	/* We don't have siglongjmp to preserve the set of blocked signals;
1717	if we longjumped out of the handler at this point, SIGALRM would
1718	remain blocked. We must unblock it manually. */
1719	int mask = siggetmask ();
1720	mask &= ~sigmask (SIGALRM);
1721	sigsetmask (mask);
1722
1723	/* Now it's safe to longjump. */
1724	longjmp (run_with_timeout_env, -1);
1725	}
1726	# endif /* not HAVE_SIGSETJMP */
1727
1728	/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses
1729	setitimer where available, alarm otherwise.
1730
1731	TIMEOUT should be non-zero. If the timeout value is so small that
1732	it would be rounded to zero, it is rounded to the least legal value
1733	instead (1us for setitimer, 1s for alarm). That ensures that
1734	SIGALRM will be delivered in all cases. */
1735
1736	static void
1737	alarm_set (double timeout)
1738	{
1739	#ifdef ITIMER_REAL
1740	/* Use the modern itimer interface. */
1741	struct itimerval itv;
1742	xzero (itv);
1743	itv.it_value.tv_sec = (long) timeout;
1744	itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1745	if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1746	/* Ensure that we wait for at least the minimum interval.
1747	Specifying zero would mean "wait forever". */
1748	itv.it_value.tv_usec = 1;
1749	setitimer (ITIMER_REAL, &itv, NULL);
1750	#else /* not ITIMER_REAL */
1751	/* Use the old alarm() interface. */
1752	int secs = (int) timeout;
1753	if (secs == 0)
1754	/* Round TIMEOUTs smaller than 1 to 1, not to zero. This is
1755	because alarm(0) means "never deliver the alarm", i.e. "wait
1756	forever", which is not what someone who specifies a 0.5s
1757	timeout would expect. */
1758	secs = 1;
1759	alarm (secs);
1760	#endif /* not ITIMER_REAL */
1761	}
1762
1763	/* Cancel the alarm set with alarm_set. */
1764
1765	static void
1766	alarm_cancel (void)
1767	{
1768	#ifdef ITIMER_REAL
1769	struct itimerval disable;
1770	xzero (disable);
1771	setitimer (ITIMER_REAL, &disable, NULL);
1772	#else /* not ITIMER_REAL */
1773	alarm (0);
1774	#endif /* not ITIMER_REAL */
1775	}
1776
1777	/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1778	seconds. Returns non-zero if the function was interrupted with a
1779	timeout, zero otherwise.
1780
1781	This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1782	using setitimer() or alarm(). The timeout is enforced by
1783	longjumping out of the SIGALRM handler. This has several
1784	advantages compared to the traditional approach of relying on
1785	signals causing system calls to exit with EINTR:
1786
1787	* The callback function is forcibly interrupted after the
1788	timeout expires, (almost) regardless of what it was doing and
1789	whether it was in a syscall. For example, a calculation that
1790	takes a long time is interrupted as reliably as an IO
1791	operation.
1792
1793	* It works with both SYSV and BSD signals because it doesn't
1794	depend on the default setting of SA_RESTART.
1795
1796	* It doesn't require special handler setup beyond a simple call
1797	to signal(). (It does use sigsetjmp/siglongjmp, but they're
1798	optional.)
1799
1800	The only downside is that, if FUN allocates internal resources that
1801	are normally freed prior to exit from the functions, they will be
1802	lost in case of timeout. */
1803
1804	int
1805	run_with_timeout (double timeout, void (fun) (void ), void *arg)
1806	{
1807	int saved_errno;
1808
1809	if (timeout == 0)
1810	{
1811	fun (arg);
1812	return 0;
1813	}
1814
1815	signal (SIGALRM, abort_run_with_timeout);
1816	if (SETJMP (run_with_timeout_env) != 0)
1817	{
1818	/* Longjumped out of FUN with a timeout. */
1819	signal (SIGALRM, SIG_DFL);
1820	return 1;
1821	}
1822	alarm_set (timeout);
1823	fun (arg);
1824
1825	/* Preserve errno in case alarm() or signal() modifies it. */
1826	saved_errno = errno;
1827	alarm_cancel ();
1828	signal (SIGALRM, SIG_DFL);
1829	errno = saved_errno;
1830
1831	return 0;
1832	}
1833
1834	#else /* not USE_SIGNAL_TIMEOUT */
1835
1836	#ifndef WINDOWS
1837	/* A stub version of run_with_timeout that just calls FUN(ARG). Don't
1838	define it under Windows, because Windows has its own version of
1839	run_with_timeout that uses threads. */
1840
1841	int
1842	run_with_timeout (double timeout, void (fun) (void ), void *arg)
1843	{
1844	fun (arg);
1845	return 0;
1846	}
1847	#endif /* not WINDOWS */
1848	#endif /* not USE_SIGNAL_TIMEOUT */
1849
1850
1851	#ifndef WINDOWS
1852
1853	/* Sleep the specified amount of seconds. On machines without
1854	nanosleep(), this may sleep shorter if interrupted by signals. */
1855
1856	void
1857	xsleep (double seconds)
1858	{
1859	#ifdef HAVE_NANOSLEEP
1860	/* nanosleep is the preferred interface because it offers high
1861	accuracy and, more importantly, because it allows us to reliably
1862	restart receiving a signal such as SIGWINCH. (There was an
1863	actual Debian bug report about --limit-rate malfunctioning while
1864	the terminal was being resized.) */
1865	struct timespec sleep, remaining;
1866	sleep.tv_sec = (long) seconds;
1867	sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
1868	while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
1869	/* If nanosleep has been interrupted by a signal, adjust the
1870	sleeping period and return to sleep. */
1871	sleep = remaining;
1872	#else /* not HAVE_NANOSLEEP */
1873	#ifdef HAVE_USLEEP
1874	/* If usleep is available, use it in preference to select. */
1875	if (seconds >= 1)
1876	{
1877	/* On some systems, usleep cannot handle values larger than
1878	1,000,000. If the period is larger than that, use sleep
1879	first, then add usleep for subsecond accuracy. */
1880	sleep (seconds);
1881	seconds -= (long) seconds;
1882	}
1883	usleep (seconds * 1000000);
1884	#else /* not HAVE_USLEEP */
1885	#ifdef HAVE_SELECT
1886	/* Note that, although Windows supports select, this sleeping
1887	strategy doesn't work there because Winsock's select doesn't
1888	implement timeout when it is passed NULL pointers for all fd
1889	sets. (But it does work under Cygwin, which implements its own
1890	select.) */
1891	struct timeval sleep;
1892	sleep.tv_sec = (long) seconds;
1893	sleep.tv_usec = 1000000 * (seconds - (long) seconds);
1894	select (0, NULL, NULL, NULL, &sleep);
1895	/* If select returns -1 and errno is EINTR, it means we were
1896	interrupted by a signal. But without knowing how long we've
1897	actually slept, we can't return to sleep. Using gettimeofday to
1898	track sleeps is slow and unreliable due to clock skew. */
1899	#else /* not HAVE_SELECT */
1900	sleep (seconds);
1901	#endif /* not HAVE_SELECT */
1902	#endif /* not HAVE_USLEEP */
1903	#endif /* not HAVE_NANOSLEEP */
1904	}
1905
1906	#endif /* not WINDOWS */
1907
1908	/* Encode the string STR of length LENGTH to base64 format and place it
1909	to B64STORE. The output will be \0-terminated, and must point to a
1910	writable buffer of at least 1+BASE64_LENGTH(length) bytes. It
1911	returns the length of the resulting base64 data, not counting the
1912	terminating zero.
1913
1914	This implementation will not emit newlines after 76 characters of
1915	base64 data. */
1916
1917	int
1918	base64_encode (const char str, int length, char b64store)
1919	{
1920	/* Conversion table. */
1921	static char tbl[64] = {
1922	'A','B','C','D','E','F','G','H',
1923	'I','J','K','L','M','N','O','P',
1924	'Q','R','S','T','U','V','W','X',
1925	'Y','Z','a','b','c','d','e','f',
1926	'g','h','i','j','k','l','m','n',
1927	'o','p','q','r','s','t','u','v',
1928	'w','x','y','z','0','1','2','3',
1929	'4','5','6','7','8','9','+','/'
1930	};
1931	int i;
1932	const unsigned char s = (const unsigned char ) str;
1933	char *p = b64store;
1934
1935	/* Transform the 3x8 bits to 4x6 bits, as required by base64. */
1936	for (i = 0; i < length; i += 3)
1937	{
1938	*p++ = tbl[s[0] >> 2];
1939	*p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
1940	*p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
1941	*p++ = tbl[s[2] & 0x3f];
1942	s += 3;
1943	}
1944
1945	/* Pad the result if necessary... */
1946	if (i == length + 1)
1947	*(p - 1) = '=';
1948	else if (i == length + 2)
1949	(p - 1) = (p - 2) = '=';
1950
1951	/* ...and zero-terminate it. */
1952	*p = '\0';
1953
1954	return p - b64store;
1955	}
1956
1957	#define IS_ASCII(c) (((c) & 0x80) == 0)
1958	#define IS_BASE64(c) ((IS_ASCII (c) && base64_char_to_value[c] >= 0) \|\| c == '=')
1959
1960	/* Get next character from the string, except that non-base64
1961	characters are ignored, as mandated by rfc2045. */
1962	#define NEXT_BASE64_CHAR(c, p) do { \
1963	c = *p++; \
1964	} while (c != '\0' && !IS_BASE64 (c))
1965
1966	/* Decode data from BASE64 (assumed to be encoded as base64) into
1967	memory pointed to by TO. TO should be large enough to accomodate
1968	the decoded data, which is guaranteed to be less than
1969	strlen(base64).
1970
1971	Since TO is assumed to contain binary data, it is not
1972	NUL-terminated. The function returns the length of the data
1973	written to TO. -1 is returned in case of error caused by malformed
1974	base64 input. */
1975
1976	int
1977	base64_decode (const char base64, char to)
1978	{
1979	/* Table of base64 values for first 128 characters. Note that this
1980	assumes ASCII (but so does Wget in other places). */
1981	static short base64_char_to_value[128] =
1982	{
1983	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */
1984	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */
1985	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */
1986	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */
1987	-1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */
1988	54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */
1989	-1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */
1990	5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */
1991	15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */
1992	25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */
1993	29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */
1994	39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */
1995	49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */
1996	};
1997
1998	const char *p = base64;
1999	char *q = to;
2000
2001	while (1)
2002	{
2003	unsigned char c;
2004	unsigned long value;
2005
2006	/* Process first byte of a quadruplet. */
2007	NEXT_BASE64_CHAR (c, p);
2008	if (!c)
2009	break;
2010	if (c == '=')
2011	return -1; /* illegal '=' while decoding base64 */
2012	value = base64_char_to_value[c] << 18;
2013
2014	/* Process scond byte of a quadruplet. */
2015	NEXT_BASE64_CHAR (c, p);
2016	if (!c)
2017	return -1; /* premature EOF while decoding base64 */
2018	if (c == '=')
2019	return -1; /* illegal `=' while decoding base64 */
2020	value \|= base64_char_to_value[c] << 12;
2021	*q++ = value >> 16;
2022
2023	/* Process third byte of a quadruplet. */
2024	NEXT_BASE64_CHAR (c, p);
2025	if (!c)
2026	return -1; /* premature EOF while decoding base64 */
2027
2028	if (c == '=')
2029	{
2030	NEXT_BASE64_CHAR (c, p);
2031	if (!c)
2032	return -1; /* premature EOF while decoding base64 */
2033	if (c != '=')
2034	return -1; /* padding `=' expected but not found */
2035	continue;
2036	}
2037
2038	value \|= base64_char_to_value[c] << 6;
2039	*q++ = 0xff & value >> 8;
2040
2041	/* Process fourth byte of a quadruplet. */
2042	NEXT_BASE64_CHAR (c, p);
2043	if (!c)
2044	return -1; /* premature EOF while decoding base64 */
2045	if (c == '=')
2046	continue;
2047
2048	value \|= base64_char_to_value[c];
2049	*q++ = 0xff & value;
2050	}
2051
2052	return q - to;
2053	}
2054
2055	#undef IS_ASCII
2056	#undef IS_BASE64
2057	#undef NEXT_BASE64_CHAR
2058
2059
2060	/* Simple merge sort for use by stable_sort. Implementation courtesy
2061	Zeljko Vrba with additional debugging by Nenad Barbutov. */
2062
2063	static void
2064	mergesort_internal (void base, void temp, size_t size, size_t from, size_t to,
2065	int (cmpfun) PARAMS ((const void , const void *)))
2066	{
2067	#define ELT(array, pos) ((char )(array) + (pos) size)
2068	if (from < to)
2069	{
2070	size_t i, j, k;
2071	size_t mid = (to + from) / 2;
2072	mergesort_internal (base, temp, size, from, mid, cmpfun);
2073	mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2074	i = from;
2075	j = mid + 1;
2076	for (k = from; (i <= mid) && (j <= to); k++)
2077	if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2078	memcpy (ELT (temp, k), ELT (base, i++), size);
2079	else
2080	memcpy (ELT (temp, k), ELT (base, j++), size);
2081	while (i <= mid)
2082	memcpy (ELT (temp, k++), ELT (base, i++), size);
2083	while (j <= to)
2084	memcpy (ELT (temp, k++), ELT (base, j++), size);
2085	for (k = from; k <= to; k++)
2086	memcpy (ELT (base, k), ELT (temp, k), size);
2087	}
2088	#undef ELT
2089	}
2090
2091	/* Stable sort with interface exactly like standard library's qsort.
2092	Uses mergesort internally, allocating temporary storage with
2093	alloca. */
2094
2095	void
2096	stable_sort (void *base, size_t nmemb, size_t size,
2097	int (cmpfun) PARAMS ((const void , const void *)))
2098	{
2099	if (size > 1)
2100	{
2101	void temp = alloca (nmemb size * sizeof (void *));
2102	mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2103	}
2104	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/net-misc/wget/src/utils.c

Download in other formats: