Line | |
---|
1 | #!/usr/bin/perl
|
---|
2 |
|
---|
3 | use strict;
|
---|
4 | use warnings 'all';
|
---|
5 |
|
---|
6 | use LWP::Simple qw /$ua getstore/;
|
---|
7 |
|
---|
8 | my %urls;
|
---|
9 |
|
---|
10 | my @dummy = qw(
|
---|
11 | http://something.here
|
---|
12 | http://www.pvhp.com
|
---|
13 | );
|
---|
14 | my %dummy;
|
---|
15 |
|
---|
16 | @dummy{@dummy} = ();
|
---|
17 |
|
---|
18 | foreach my $file (<*/*.pod */*/*.pod */*/*/*.pod README README.* INSTALL>) {
|
---|
19 | open my $fh => $file or die "Failed to open $file: $!\n";
|
---|
20 | while (<$fh>) {
|
---|
21 | if (m{(?:http|ftp)://(?:(?!\w<)[-\w~?@=.])+} && !exists $dummy{$&}) {
|
---|
22 | my $url = $&;
|
---|
23 | $url =~ s/\.$//;
|
---|
24 | $urls {$url} ||= { };
|
---|
25 | $urls {$url} {$file} = 1;
|
---|
26 | }
|
---|
27 | }
|
---|
28 | close $fh;
|
---|
29 | }
|
---|
30 |
|
---|
31 | sub fisher_yates_shuffle {
|
---|
32 | my $deck = shift; # $deck is a reference to an array
|
---|
33 | my $i = @$deck;
|
---|
34 | while (--$i) {
|
---|
35 | my $j = int rand ($i+1);
|
---|
36 | @$deck[$i,$j] = @$deck[$j,$i];
|
---|
37 | }
|
---|
38 | }
|
---|
39 |
|
---|
40 | my @urls = keys %urls;
|
---|
41 |
|
---|
42 | fisher_yates_shuffle(\@urls);
|
---|
43 |
|
---|
44 | sub todo {
|
---|
45 | warn "(", scalar @urls, " URLs)\n";
|
---|
46 | }
|
---|
47 |
|
---|
48 | my $MAXPROC = 40;
|
---|
49 | my $MAXURL = 10;
|
---|
50 | my $MAXFORK = $MAXPROC < $MAXURL ? 1 : $MAXPROC / $MAXURL;
|
---|
51 |
|
---|
52 | select(STDERR); $| = 1;
|
---|
53 | select(STDOUT); $| = 1;
|
---|
54 |
|
---|
55 | while (@urls) {
|
---|
56 | my @list;
|
---|
57 | my $pid;
|
---|
58 | my $i;
|
---|
59 |
|
---|
60 | todo();
|
---|
61 |
|
---|
62 | for ($i = 0; $i < $MAXFORK; $i++) {
|
---|
63 | $list[$i] = [ splice @urls, 0, $MAXURL ];
|
---|
64 | $pid = fork;
|
---|
65 | die "Failed to fork: $!\n" unless defined $pid;
|
---|
66 | last unless $pid; # Child.
|
---|
67 | }
|
---|
68 |
|
---|
69 | if ($pid) {
|
---|
70 | # Parent.
|
---|
71 | warn "(waiting)\n";
|
---|
72 | 1 until -1 == wait; # Reap.
|
---|
73 | } else {
|
---|
74 | # Child.
|
---|
75 | foreach my $url (@{$list[$i]}) {
|
---|
76 | my $code = getstore $url, "/dev/null";
|
---|
77 | next if $code == 200;
|
---|
78 | my $f = join ", " => keys %{$urls {$url}};
|
---|
79 | printf "%03d %s: %s\n" => $code, $url, $f;
|
---|
80 | }
|
---|
81 |
|
---|
82 | exit;
|
---|
83 | }
|
---|
84 | }
|
---|
85 |
|
---|
86 | __END__
|
---|
Note:
See
TracBrowser
for help on using the repository browser.