Context Navigation

retr.c

Visit:

Last change on this file was 3440, checked in by bird, 18 years ago
wget 1.10.2
File size: 27.4 KB

Line
1	/* File retrieval.
2	Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001 Free Software Foundation, Inc.
3
4	This file is part of GNU Wget.
5
6	GNU Wget is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 2 of the License, or (at
9	your option) any later version.
10
11	GNU Wget is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with Wget; if not, write to the Free Software
18	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20	In addition, as a special exception, the Free Software Foundation
21	gives permission to link the code of its release of Wget with the
22	OpenSSL project's "OpenSSL" library (or with modified versions of it
23	that use the same license as the "OpenSSL" library), and distribute
24	the linked executables. You must obey the GNU General Public License
25	in all respects for all of the code used other than "OpenSSL". If you
26	modify this file, you may extend this exception to your version of the
27	file, but you are not obligated to do so. If you do not wish to do
28	so, delete this exception statement from your version. */
29
30	#include <config.h>
31
32	#include <stdio.h>
33	#include <stdlib.h>
34	#include <sys/types.h>
35	#ifdef HAVE_UNISTD_H
36	# include <unistd.h>
37	#endif /* HAVE_UNISTD_H */
38	#include <errno.h>
39	#ifdef HAVE_STRING_H
40	# include <string.h>
41	#else
42	# include <strings.h>
43	#endif /* HAVE_STRING_H */
44	#include <assert.h>
45
46	#include "wget.h"
47	#include "utils.h"
48	#include "retr.h"
49	#include "progress.h"
50	#include "url.h"
51	#include "recur.h"
52	#include "ftp.h"
53	#include "host.h"
54	#include "connect.h"
55	#include "hash.h"
56	#include "convert.h"
57	#include "ptimer.h"
58
59	#ifndef errno
60	extern int errno;
61	#endif
62
63	/* Total size of downloaded files. Used to enforce quota. */
64	SUM_SIZE_INT total_downloaded_bytes;
65
66	/* If non-NULL, the stream to which output should be written. This
67	stream is initialized when `-O' is used. */
68	FILE *output_stream;
69
70	/* Whether output_document is a regular file we can manipulate,
71	i.e. not `-' or a device file. */
72	int output_stream_regular;
73
74
75	static struct {
76	wgint chunk_bytes;
77	double chunk_start;
78	double sleep_adjust;
79	} limit_data;
80
81	static void
82	limit_bandwidth_reset (void)
83	{
84	limit_data.chunk_bytes = 0;
85	limit_data.chunk_start = 0;
86	limit_data.sleep_adjust = 0;
87	}
88
89	/* Limit the bandwidth by pausing the download for an amount of time.
90	BYTES is the number of bytes received from the network, and TIMER
91	is the timer that started at the beginning of download. */
92
93	static void
94	limit_bandwidth (wgint bytes, struct ptimer *timer)
95	{
96	double delta_t = ptimer_read (timer) - limit_data.chunk_start;
97	double expected;
98
99	limit_data.chunk_bytes += bytes;
100
101	/* Calculate the amount of time we expect downloading the chunk
102	should take. If in reality it took less time, sleep to
103	compensate for the difference. */
104	expected = 1000.0 * limit_data.chunk_bytes / opt.limit_rate;
105
106	if (expected > delta_t)
107	{
108	double slp = expected - delta_t + limit_data.sleep_adjust;
109	double t0, t1;
110	if (slp < 200)
111	{
112	DEBUGP (("deferring a %.2f ms sleep (%s/%.2f).\n",
113	slp, number_to_static_string (limit_data.chunk_bytes),
114	delta_t));
115	return;
116	}
117	DEBUGP (("\nsleeping %.2f ms for %s bytes, adjust %.2f ms\n",
118	slp, number_to_static_string (limit_data.chunk_bytes),
119	limit_data.sleep_adjust));
120
121	t0 = ptimer_read (timer);
122	xsleep (slp / 1000);
123	t1 = ptimer_measure (timer);
124
125	/* Due to scheduling, we probably slept slightly longer (or
126	shorter) than desired. Calculate the difference between the
127	desired and the actual sleep, and adjust the next sleep by
128	that amount. */
129	limit_data.sleep_adjust = slp - (t1 - t0);
130	/* If sleep_adjust is very large, it's likely due to suspension
131	and not clock inaccuracy. Don't enforce those. */
132	if (limit_data.sleep_adjust > 500)
133	limit_data.sleep_adjust = 500;
134	else if (limit_data.sleep_adjust < -500)
135	limit_data.sleep_adjust = -500;
136	}
137
138	limit_data.chunk_bytes = 0;
139	limit_data.chunk_start = ptimer_read (timer);
140	}
141
142	#ifndef MIN
143	# define MIN(i, j) ((i) <= (j) ? (i) : (j))
144	#endif
145
146	/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
147	amount of data and decrease SKIP. Increment *TOTAL by the amount
148	of data written. */
149
150	static int
151	write_data (FILE out, const char buf, int bufsize, wgint *skip,
152	wgint *written)
153	{
154	if (!out)
155	return 1;
156	if (*skip > bufsize)
157	{
158	*skip -= bufsize;
159	return 1;
160	}
161	if (*skip)
162	{
163	buf += *skip;
164	bufsize -= *skip;
165	*skip = 0;
166	if (bufsize == 0)
167	return 1;
168	}
169
170	fwrite (buf, 1, bufsize, out);
171	*written += bufsize;
172
173	/* Immediately flush the downloaded data. This should not hinder
174	performance: fast downloads will arrive in large 16K chunks
175	(which stdio would write out immediately anyway), and slow
176	downloads wouldn't be limited by disk speed. */
177	fflush (out);
178	return !ferror (out);
179	}
180
181	/* Read the contents of file descriptor FD until it the connection
182	terminates or a read error occurs. The data is read in portions of
183	up to 16K and written to OUT as it arrives. If opt.verbose is set,
184	the progress is shown.
185
186	TOREAD is the amount of data expected to arrive, normally only used
187	by the progress gauge.
188
189	STARTPOS is the position from which the download starts, used by
190	the progress gauge. If QTYREAD is non-NULL, the value it points to
191	is incremented by the amount of data read from the network. If
192	QTYWRITTEN is non-NULL, the value it points to is incremented by
193	the amount of data written to disk. The time it took to download
194	the data (in milliseconds) is stored to ELAPSED.
195
196	The function exits and returns the amount of data read. In case of
197	error while reading data, -1 is returned. In case of error while
198	writing data, -2 is returned. */
199
200	int
201	fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
202	wgint qtyread, wgint qtywritten, double *elapsed, int flags)
203	{
204	int ret = 0;
205
206	static char dlbuf[16384];
207	int dlbufsize = sizeof (dlbuf);
208
209	struct ptimer *timer = NULL;
210	double last_successful_read_tm = 0;
211
212	/* The progress gauge, set according to the user preferences. */
213	void *progress = NULL;
214
215	/* Non-zero if the progress gauge is interactive, i.e. if it can
216	continually update the display. When true, smaller timeout
217	values are used so that the gauge can update the display when
218	data arrives slowly. */
219	int progress_interactive = 0;
220
221	int exact = flags & rb_read_exactly;
222	wgint skip = 0;
223
224	/* How much data we've read/written. */
225	wgint sum_read = 0;
226	wgint sum_written = 0;
227
228	if (flags & rb_skip_startpos)
229	skip = startpos;
230
231	if (opt.verbose)
232	{
233	/* If we're skipping STARTPOS bytes, pass 0 as the INITIAL
234	argument to progress_create because the indicator doesn't
235	(yet) know about "skipping" data. */
236	progress = progress_create (skip ? 0 : startpos, startpos + toread);
237	progress_interactive = progress_interactive_p (progress);
238	}
239
240	if (opt.limit_rate)
241	limit_bandwidth_reset ();
242
243	/* A timer is needed for tracking progress, for throttling, and for
244	tracking elapsed time. If either of these are requested, start
245	the timer. */
246	if (progress \|\| opt.limit_rate \|\| elapsed)
247	{
248	timer = ptimer_new ();
249	last_successful_read_tm = 0;
250	}
251
252	/* Use a smaller buffer for low requested bandwidths. For example,
253	with --limit-rate=2k, it doesn't make sense to slurp in 16K of
254	data and then sleep for 8s. With buffer size equal to the limit,
255	we never have to sleep for more than one second. */
256	if (opt.limit_rate && opt.limit_rate < dlbufsize)
257	dlbufsize = opt.limit_rate;
258
259	/* Read from FD while there is data to read. Normally toread==0
260	means that it is unknown how much data is to arrive. However, if
261	EXACT is set, then toread==0 means what it says: that no data
262	should be read. */
263	while (!exact \|\| (sum_read < toread))
264	{
265	int rdsize = exact ? MIN (toread - sum_read, dlbufsize) : dlbufsize;
266	double tmout = opt.read_timeout;
267	if (progress_interactive)
268	{
269	/* For interactive progress gauges, always specify a ~1s
270	timeout, so that the gauge can be updated regularly even
271	when the data arrives very slowly or stalls. */
272	tmout = 0.95;
273	if (opt.read_timeout)
274	{
275	double waittm;
276	waittm = (ptimer_read (timer) - last_successful_read_tm) / 1000;
277	if (waittm + tmout > opt.read_timeout)
278	{
279	/* Don't let total idle time exceed read timeout. */
280	tmout = opt.read_timeout - waittm;
281	if (tmout < 0)
282	{
283	/* We've already exceeded the timeout. */
284	ret = -1, errno = ETIMEDOUT;
285	break;
286	}
287	}
288	}
289	}
290	ret = fd_read (fd, dlbuf, rdsize, tmout);
291
292	if (progress_interactive && ret < 0 && errno == ETIMEDOUT)
293	ret = 0; /* interactive timeout, handled above */
294	else if (ret <= 0)
295	break; /* EOF or read error */
296
297	if (progress \|\| opt.limit_rate)
298	{
299	ptimer_measure (timer);
300	if (ret > 0)
301	last_successful_read_tm = ptimer_read (timer);
302	}
303
304	if (ret > 0)
305	{
306	sum_read += ret;
307	if (!write_data (out, dlbuf, ret, &skip, &sum_written))
308	{
309	ret = -2;
310	goto out_;
311	}
312	}
313
314	if (opt.limit_rate)
315	limit_bandwidth (ret, timer);
316
317	if (progress)
318	progress_update (progress, ret, ptimer_read (timer));
319	#ifdef WINDOWS
320	if (toread > 0 && !opt.quiet)
321	ws_percenttitle (100.0 *
322	(startpos + sum_read) / (startpos + toread));
323	#endif
324	}
325	if (ret < -1)
326	ret = -1;
327
328	out_:
329	if (progress)
330	progress_finish (progress, ptimer_read (timer));
331
332	if (elapsed)
333	*elapsed = ptimer_read (timer);
334	if (timer)
335	ptimer_destroy (timer);
336
337	if (qtyread)
338	*qtyread += sum_read;
339	if (qtywritten)
340	*qtywritten += sum_written;
341
342	return ret;
343	}
344
345
346	/* Read a hunk of data from FD, up until a terminator. The terminator
347	is whatever the TERMINATOR function determines it to be; for
348	example, it can be a line of data, or the head of an HTTP response.
349	The function returns the data read allocated with malloc.
350
351	In case of error, NULL is returned. In case of EOF and no data
352	read, NULL is returned and errno set to 0. In case of EOF with
353	data having been read, the data is returned, but it will
354	(obviously) not contain the terminator.
355
356	The idea is to be able to read a line of input, or otherwise a hunk
357	of text, such as the head of an HTTP request, without crossing the
358	boundary, so that the next call to fd_read etc. reads the data
359	after the hunk. To achieve that, this function does the following:
360
361	1. Peek at available data.
362
363	2. Determine whether the peeked data, along with the previously
364	read data, includes the terminator.
365
366	2a. If yes, read the data until the end of the terminator, and
367	exit.
368
369	2b. If no, read the peeked data and goto 1.
370
371	The function is careful to assume as little as possible about the
372	implementation of peeking. For example, every peek is followed by
373	a read. If the read returns a different amount of data, the
374	process is retried until all data arrives safely.
375
376	SIZEHINT is the buffer size sufficient to hold all the data in the
377	typical case (it is used as the initial buffer size). MAXSIZE is
378	the maximum amount of memory this function is allowed to allocate,
379	or 0 if no upper limit is to be enforced.
380
381	This function should be used as a building block for other
382	functions -- see fd_read_line as a simple example. */
383
384	char *
385	fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
386	{
387	long bufsize = sizehint;
388	char *hunk = xmalloc (bufsize);
389	int tail = 0; /* tail position in HUNK */
390
391	assert (maxsize >= bufsize);
392
393	while (1)
394	{
395	const char *end;
396	int pklen, rdlen, remain;
397
398	/* First, peek at the available data. */
399
400	pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1.0);
401	if (pklen < 0)
402	{
403	xfree (hunk);
404	return NULL;
405	}
406	end = terminator (hunk, tail, pklen);
407	if (end)
408	{
409	/* The data contains the terminator: we'll drain the data up
410	to the end of the terminator. */
411	remain = end - (hunk + tail);
412	if (remain == 0)
413	{
414	/* No more data needs to be read. */
415	hunk[tail] = '\0';
416	return hunk;
417	}
418	if (bufsize - 1 < tail + remain)
419	{
420	bufsize = tail + remain + 1;
421	hunk = xrealloc (hunk, bufsize);
422	}
423	}
424	else
425	/* No terminator: simply read the data we know is (or should
426	be) available. */
427	remain = pklen;
428
429	/* Now, read the data. Note that we make no assumptions about
430	how much data we'll get. (Some TCP stacks are notorious for
431	read returning less data than the previous MSG_PEEK.) */
432
433	rdlen = fd_read (fd, hunk + tail, remain, 0.0);
434	if (rdlen < 0)
435	{
436	xfree_null (hunk);
437	return NULL;
438	}
439	tail += rdlen;
440	hunk[tail] = '\0';
441
442	if (rdlen == 0)
443	{
444	if (tail == 0)
445	{
446	/* EOF without anything having been read */
447	xfree (hunk);
448	errno = 0;
449	return NULL;
450	}
451	else
452	/* EOF seen: return the data we've read. */
453	return hunk;
454	}
455	if (end && rdlen == remain)
456	/* The terminator was seen and the remaining data drained --
457	we got what we came for. */
458	return hunk;
459
460	/* Keep looping until all the data arrives. */
461
462	if (tail == bufsize - 1)
463	{
464	/* Double the buffer size, but refuse to allocate more than
465	MAXSIZE bytes. */
466	if (maxsize && bufsize >= maxsize)
467	{
468	xfree (hunk);
469	errno = ENOMEM;
470	return NULL;
471	}
472	bufsize <<= 1;
473	if (maxsize && bufsize > maxsize)
474	bufsize = maxsize;
475	hunk = xrealloc (hunk, bufsize);
476	}
477	}
478	}
479
480	static const char *
481	line_terminator (const char *hunk, int oldlen, int peeklen)
482	{
483	const char *p = memchr (hunk + oldlen, '\n', peeklen);
484	if (p)
485	/* p+1 because we want the line to include '\n' */
486	return p + 1;
487	return NULL;
488	}
489
490	/* The maximum size of the single line we agree to accept. This is
491	not meant to impose an arbitrary limit, but to protect the user
492	from Wget slurping up available memory upon encountering malicious
493	or buggy server output. Define it to 0 to remove the limit. */
494	#define FD_READ_LINE_MAX 4096
495
496	/* Read one line from FD and return it. The line is allocated using
497	malloc, but is never larger than FD_READ_LINE_MAX.
498
499	If an error occurs, or if no data can be read, NULL is returned.
500	In the former case errno indicates the error condition, and in the
501	latter case, errno is NULL. */
502
503	char *
504	fd_read_line (int fd)
505	{
506	return fd_read_hunk (fd, line_terminator, 128, FD_READ_LINE_MAX);
507	}
508
509
510	/* Return a printed representation of the download rate, as
511	appropriate for the speed. If PAD is non-zero, strings will be
512	padded to the width of 7 characters (xxxx.xx). */
513	char *
514	retr_rate (wgint bytes, double msecs, int pad)
515	{
516	static char res[20];
517	static const char *rate_names[] = {"B/s", "KB/s", "MB/s", "GB/s" };
518	int units = 0;
519
520	double dlrate = calc_rate (bytes, msecs, &units);
521	sprintf (res, pad ? "%7.2f %s" : "%.2f %s", dlrate, rate_names[units]);
522
523	return res;
524	}
525
526	/* Calculate the download rate and trim it as appropriate for the
527	speed. Appropriate means that if rate is greater than 1K/s,
528	kilobytes are used, and if rate is greater than 1MB/s, megabytes
529	are used.
530
531	UNITS is zero for B/s, one for KB/s, two for MB/s, and three for
532	GB/s. */
533	double
534	calc_rate (wgint bytes, double msecs, int *units)
535	{
536	double dlrate;
537
538	assert (msecs >= 0);
539	assert (bytes >= 0);
540
541	if (msecs == 0)
542	/* If elapsed time is exactly zero, it means we're under the
543	resolution of the timer. This can easily happen on systems
544	that use time() for the timer. Since the interval lies between
545	0 and the timer's resolution, assume half the resolution. */
546	msecs = ptimer_resolution () / 2.0;
547
548	dlrate = 1000.0 * bytes / msecs;
549	if (dlrate < 1024.0)
550	*units = 0;
551	else if (dlrate < 1024.0 * 1024.0)
552	*units = 1, dlrate /= 1024.0;
553	else if (dlrate < 1024.0 * 1024.0 * 1024.0)
554	units = 2, dlrate /= (1024.0 1024.0);
555	else
556	/* Maybe someone will need this, one day. */
557	units = 3, dlrate /= (1024.0 1024.0 * 1024.0);
558
559	return dlrate;
560	}
561
562
563	/* Maximum number of allowed redirections. 20 was chosen as a
564	"reasonable" value, which is low enough to not cause havoc, yet
565	high enough to guarantee that normal retrievals will not be hurt by
566	the check. */
567
568	#define MAX_REDIRECTIONS 20
569
570	#define SUSPEND_POST_DATA do { \
571	post_data_suspended = 1; \
572	saved_post_data = opt.post_data; \
573	saved_post_file_name = opt.post_file_name; \
574	opt.post_data = NULL; \
575	opt.post_file_name = NULL; \
576	} while (0)
577
578	#define RESTORE_POST_DATA do { \
579	if (post_data_suspended) \
580	{ \
581	opt.post_data = saved_post_data; \
582	opt.post_file_name = saved_post_file_name; \
583	post_data_suspended = 0; \
584	} \
585	} while (0)
586
587	static char getproxy PARAMS ((struct url ));
588
589	/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
590	FTP, proxy, etc. */
591
592	/* #### This function should be rewritten so it doesn't return from
593	multiple points. */
594
595	uerr_t
596	retrieve_url (const char origurl, char file, char *newloc,
597	const char refurl, int dt)
598	{
599	uerr_t result;
600	char *url;
601	int location_changed, dummy;
602	char mynewloc, proxy;
603	struct url u, proxy_url;
604	int up_error_code; /* url parse error code */
605	char *local_file;
606	int redirection_count = 0;
607
608	int post_data_suspended = 0;
609	char *saved_post_data = NULL;
610	char *saved_post_file_name = NULL;
611
612	/* If dt is NULL, use local storage. */
613	if (!dt)
614	{
615	dt = &dummy;
616	dummy = 0;
617	}
618	url = xstrdup (origurl);
619	if (newloc)
620	*newloc = NULL;
621	if (file)
622	*file = NULL;
623
624	u = url_parse (url, &up_error_code);
625	if (!u)
626	{
627	logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
628	xfree (url);
629	return URLERROR;
630	}
631
632	if (!refurl)
633	refurl = opt.referer;
634
635	redirected:
636
637	result = NOCONERROR;
638	mynewloc = NULL;
639	local_file = NULL;
640	proxy_url = NULL;
641
642	proxy = getproxy (u);
643	if (proxy)
644	{
645	/* Parse the proxy URL. */
646	proxy_url = url_parse (proxy, &up_error_code);
647	if (!proxy_url)
648	{
649	logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
650	proxy, url_error (up_error_code));
651	xfree (url);
652	RESTORE_POST_DATA;
653	return PROXERR;
654	}
655	if (proxy_url->scheme != SCHEME_HTTP && proxy_url->scheme != u->scheme)
656	{
657	logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
658	url_free (proxy_url);
659	xfree (url);
660	RESTORE_POST_DATA;
661	return PROXERR;
662	}
663	}
664
665	if (u->scheme == SCHEME_HTTP
666	#ifdef HAVE_SSL
667	\|\| u->scheme == SCHEME_HTTPS
668	#endif
669	\|\| (proxy_url && proxy_url->scheme == SCHEME_HTTP))
670	{
671	result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
672	}
673	else if (u->scheme == SCHEME_FTP)
674	{
675	/* If this is a redirection, temporarily turn off opt.ftp_glob
676	and opt.recursive, both being undesirable when following
677	redirects. */
678	int oldrec = opt.recursive, oldglob = opt.ftp_glob;
679	if (redirection_count)
680	opt.recursive = opt.ftp_glob = 0;
681
682	result = ftp_loop (u, dt, proxy_url);
683	opt.recursive = oldrec;
684	opt.ftp_glob = oldglob;
685
686	/* There is a possibility of having HTTP being redirected to
687	FTP. In these cases we must decide whether the text is HTML
688	according to the suffix. The HTML suffixes are `.html',
689	`.htm' and a few others, case-insensitive. */
690	if (redirection_count && local_file && u->scheme == SCHEME_FTP)
691	{
692	if (has_html_suffix_p (local_file))
693	*dt \|= TEXTHTML;
694	}
695	}
696
697	if (proxy_url)
698	{
699	url_free (proxy_url);
700	proxy_url = NULL;
701	}
702
703	location_changed = (result == NEWLOCATION);
704	if (location_changed)
705	{
706	char *construced_newloc;
707	struct url *newloc_parsed;
708
709	assert (mynewloc != NULL);
710
711	if (local_file)
712	xfree (local_file);
713
714	/* The HTTP specs only allow absolute URLs to appear in
715	redirects, but a ton of boneheaded webservers and CGIs out
716	there break the rules and use relative URLs, and popular
717	browsers are lenient about this, so wget should be too. */
718	construced_newloc = uri_merge (url, mynewloc);
719	xfree (mynewloc);
720	mynewloc = construced_newloc;
721
722	/* Now, see if this new location makes sense. */
723	newloc_parsed = url_parse (mynewloc, &up_error_code);
724	if (!newloc_parsed)
725	{
726	logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
727	url_error (up_error_code));
728	url_free (u);
729	xfree (url);
730	xfree (mynewloc);
731	RESTORE_POST_DATA;
732	return result;
733	}
734
735	/* Now mynewloc will become newloc_parsed->url, because if the
736	Location contained relative paths like .././something, we
737	don't want that propagating as url. */
738	xfree (mynewloc);
739	mynewloc = xstrdup (newloc_parsed->url);
740
741	/* Check for max. number of redirections. */
742	if (++redirection_count > MAX_REDIRECTIONS)
743	{
744	logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
745	MAX_REDIRECTIONS);
746	url_free (newloc_parsed);
747	url_free (u);
748	xfree (url);
749	xfree (mynewloc);
750	RESTORE_POST_DATA;
751	return WRONGCODE;
752	}
753
754	xfree (url);
755	url = mynewloc;
756	url_free (u);
757	u = newloc_parsed;
758
759	/* If we're being redirected from POST, we don't want to POST
760	again. Many requests answer POST with a redirection to an
761	index page; that redirection is clearly a GET. We "suspend"
762	POST data for the duration of the redirections, and restore
763	it when we're done. */
764	if (!post_data_suspended)
765	SUSPEND_POST_DATA;
766
767	goto redirected;
768	}
769
770	if (local_file)
771	{
772	if (*dt & RETROKF)
773	{
774	register_download (u->url, local_file);
775	if (redirection_count && 0 != strcmp (origurl, u->url))
776	register_redirection (origurl, u->url);
777	if (*dt & TEXTHTML)
778	register_html (u->url, local_file);
779	}
780	}
781
782	if (file)
783	*file = local_file ? local_file : NULL;
784	else
785	xfree_null (local_file);
786
787	url_free (u);
788
789	if (redirection_count)
790	{
791	if (newloc)
792	*newloc = url;
793	else
794	xfree (url);
795	}
796	else
797	{
798	if (newloc)
799	*newloc = NULL;
800	xfree (url);
801	}
802
803	RESTORE_POST_DATA;
804
805	return result;
806	}
807
808	/* Find the URLs in the file and call retrieve_url() for each of
809	them. If HTML is non-zero, treat the file as HTML, and construct
810	the URLs accordingly.
811
812	If opt.recursive is set, call retrieve_tree() for each file. */
813
814	uerr_t
815	retrieve_from_file (const char file, int html, int count)
816	{
817	uerr_t status;
818	struct urlpos url_list, cur_url;
819
820	url_list = (html ? get_urls_html (file, NULL, NULL)
821	: get_urls_file (file));
822	status = RETROK; /* Suppose everything is OK. */
823	count = 0; / Reset the URL count. */
824
825	for (cur_url = url_list; cur_url; cur_url = cur_url->next, ++*count)
826	{
827	char filename = NULL, new_file = NULL;
828	int dt;
829
830	if (cur_url->ignore_when_downloading)
831	continue;
832
833	if (opt.quota && total_downloaded_bytes > opt.quota)
834	{
835	status = QUOTEXC;
836	break;
837	}
838	if ((opt.recursive \|\| opt.page_requisites)
839	&& cur_url->url->scheme != SCHEME_FTP)
840	status = retrieve_tree (cur_url->url->url);
841	else
842	status = retrieve_url (cur_url->url->url, &filename, &new_file, NULL, &dt);
843
844	if (filename && opt.delete_after && file_exists_p (filename))
845	{
846	DEBUGP (("\
847	Removing file due to --delete-after in retrieve_from_file():\n"));
848	logprintf (LOG_VERBOSE, _("Removing %s.\n"), filename);
849	if (unlink (filename))
850	logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
851	dt &= ~RETROKF;
852	}
853
854	xfree_null (new_file);
855	xfree_null (filename);
856	}
857
858	/* Free the linked list of URL-s. */
859	free_urlpos (url_list);
860
861	return status;
862	}
863
864	/* Print `giving up', or `retrying', depending on the impending
865	action. N1 and N2 are the attempt number and the attempt limit. */
866	void
867	printwhat (int n1, int n2)
868	{
869	logputs (LOG_VERBOSE, (n1 == n2) ? _("Giving up.\n\n") : _("Retrying.\n\n"));
870	}
871
872	/* If opt.wait or opt.waitretry are specified, and if certain
873	conditions are met, sleep the appropriate number of seconds. See
874	the documentation of --wait and --waitretry for more information.
875
876	COUNT is the count of current retrieval, beginning with 1. */
877
878	void
879	sleep_between_retrievals (int count)
880	{
881	static int first_retrieval = 1;
882
883	if (first_retrieval)
884	{
885	/* Don't sleep before the very first retrieval. */
886	first_retrieval = 0;
887	return;
888	}
889
890	if (opt.waitretry && count > 1)
891	{
892	/* If opt.waitretry is specified and this is a retry, wait for
893	COUNT-1 number of seconds, or for opt.waitretry seconds. */
894	if (count <= opt.waitretry)
895	xsleep (count - 1.0);
896	else
897	xsleep (opt.waitretry);
898	}
899	else if (opt.wait)
900	{
901	if (!opt.random_wait \|\| count > 1)
902	/* If random-wait is not specified, or if we are sleeping
903	between retries of the same download, sleep the fixed
904	interval. */
905	xsleep (opt.wait);
906	else
907	{
908	/* Sleep a random amount of time averaging in opt.wait
909	seconds. The sleeping amount ranges from 0 to
910	opt.wait2, inclusive. /
911	double waitsecs = 2 * opt.wait * random_float ();
912	DEBUGP (("sleep_between_retrievals: avg=%f,sleep=%f\n",
913	opt.wait, waitsecs));
914	xsleep (waitsecs);
915	}
916	}
917	}
918
919	/* Free the linked list of urlpos. */
920	void
921	free_urlpos (struct urlpos *l)
922	{
923	while (l)
924	{
925	struct urlpos *next = l->next;
926	if (l->url)
927	url_free (l->url);
928	xfree_null (l->local_name);
929	xfree (l);
930	l = next;
931	}
932	}
933
934	/* Rotate FNAME opt.backups times */
935	void
936	rotate_backups(const char *fname)
937	{
938	int maxlen = strlen (fname) + 1 + numdigit (opt.backups) + 1;
939	char from = (char )alloca (maxlen);
940	char to = (char )alloca (maxlen);
941	struct_stat sb;
942	int i;
943
944	if (stat (fname, &sb) == 0)
945	if (S_ISREG (sb.st_mode) == 0)
946	return;
947
948	for (i = opt.backups; i > 1; i--)
949	{
950	sprintf (from, "%s.%d", fname, i - 1);
951	sprintf (to, "%s.%d", fname, i);
952	rename (from, to);
953	}
954
955	sprintf (to, "%s.%d", fname, 1);
956	rename(fname, to);
957	}
958
959	static int no_proxy_match PARAMS ((const char , const char *));
960
961	/* Return the URL of the proxy appropriate for url U. */
962
963	static char *
964	getproxy (struct url *u)
965	{
966	char *proxy = NULL;
967	char *rewritten_url;
968	static char rewritten_storage[1024];
969
970	if (!opt.use_proxy)
971	return NULL;
972	if (!no_proxy_match (u->host, (const char **)opt.no_proxy))
973	return NULL;
974
975	switch (u->scheme)
976	{
977	case SCHEME_HTTP:
978	proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
979	break;
980	#ifdef HAVE_SSL
981	case SCHEME_HTTPS:
982	proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
983	break;
984	#endif
985	case SCHEME_FTP:
986	proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
987	break;
988	case SCHEME_INVALID:
989	break;
990	}
991	if (!proxy \|\| !*proxy)
992	return NULL;
993
994	/* Handle shorthands. `rewritten_storage' is a kludge to allow
995	getproxy() to return static storage. */
996	rewritten_url = rewrite_shorthand_url (proxy);
997	if (rewritten_url)
998	{
999	strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage));
1000	rewritten_storage[sizeof (rewritten_storage) - 1] = '\0';
1001	proxy = rewritten_storage;
1002	}
1003
1004	return proxy;
1005	}
1006
1007	/* Should a host be accessed through proxy, concerning no_proxy? */
1008	static int
1009	no_proxy_match (const char host, const char *no_proxy)
1010	{
1011	if (!no_proxy)
1012	return 1;
1013	else
1014	return !sufmatch (no_proxy, host);
1015	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/net-misc/wget/src/retr.c

Download in other formats: