download_archive_dot_org.pl
1	#!/usr/bin/perl
2
3 use strict;
4 use warnings;
5
6 # Try and find .mp3 links in archive.org pages
7
8 if ( ! defined $ARGV[0] ) {
9 print "Need to pass archive.org URL\n";
10 exit 1;
11 }
12
13 my $page_url = shift(@ARGV);
14 my $page_content = `curl -s $page_url`;
15 my @names;
16 my @links;
17
18 my $counter = 1;
19 foreach my $line ( split("\n",$page_content) ) {
20 chomp $line;
21 if ( $line =~ m/<meta itemprop="name" content="(.*)"/ ) {
22 my $name = $counter . "_" . $1;
23 push(@names,$name);
24 $counter++;
25 } elsif ( $line =~ m/<link itemprop="associatedMedia" href="(.*\.mp3)"/ ) {
26 push(@links,$1);
27 }
28 }
29
30 my %link_map;
31 @link_map{@names} = @links;
32
33 sub make_filename($) {
34 my $name = shift;
35 my $filename;
36 open(my $fh, ">>", \$filename);
37 foreach my $char ( split("",$name) ) {
38 if ( $char =~ m/[\s->'\/\!\&\:]/ ) {
39 print $fh "";
40 } else {
41 print $fh "$char";
42 }
43 }
44
45 return $filename;
46 }
47
48 foreach my $key ( keys %link_map ) {
49 #print "$key : $link_map{$key}\n";
50 my $filename = make_filename($key) . ".mp3";
51 #print "wget -q $link_map{$key} -O $filename\n";
52 print "Downloading $key\n";
53 system("wget -q $link_map{$key} -O $filename");
54 }