#!/usr/local/bin/perl -w
use strict;
use WWW::Mechanize;
my $debug = 0;
$| = 1;
my $albumurl = $ARGV[0];
my $username = $ARGV[1];
my $password = $ARGV[2];
$debug = $ARGV[3] || $debug;
die "Usage: scrape.pl albumurl username password [debuglevel]\n"
unless $password;
my ($albumid) = $albumurl =~ /id=(\d+)(\&?)/;
my $indexurl =
'http://www.imagestation.com/album/pictures.html?id=' . $albumid;
my $stage = 0;
print "\n";
print "Imagestation Scraper by Maverick Woo\n";
print "== Version 2005-03-15 ==\n";
print "== Debug level $debug ==\n" if $debug;
print "\n";
my $mech = WWW::Mechanize->new();
$mech->agent_alias('Windows Mozilla');
$stage++;
print "Connecting to album $albumid...\n";
$mech->get($indexurl);
print "@ Front page\n";
if ($debug) {
open F, '>' . $stage . '-entry.html' or die;
print F $mech->content();
close F;
}
if ($debug >= 2) {
print "Available links:\n";
my @links = $mech->links();
foreach (@links) {
print "-> ", $_->text(), "\n";
}
}
$stage++;
$mech->follow_link(text => "click here");
print "@ Log in\n";
if ($debug) {
open F, '>' . $stage . '-login.html' or die;
print F $mech->content();
close F;
}
$mech->form_name('mainForm');
$mech->set_fields(
'username' => $username,
'password' => $password
);
$stage++;
$mech->submit();
print "@ Redirect\n";
if ($debug) {
open F, '>' . $stage . '-redirect.html' or die;
print F $mech->content();
close F;
}
if ($debug >= 2) {
print "Available links:\n";
my @links = $mech->links();
foreach (@links) {
print "-> ", $_->text(), "\n";
}
}
$stage++;
$mech->follow_link(text => "here");
my ($title) = $mech->title() =~ /^Album: (.+)/;
print "@ Album [$title]\n";
if ($debug) {
open F, '>' . $stage . '-coverpage.html' or die;
print F $mech->content();
close F;
}
$title =~ s/\?//g;
$title =~ s/\//-/g;
$title =~ s/\\/_/g;
$stage++;
$mech->follow_link(text => "Index");
print "@ Index";
if ($debug) {
open F, '>' . $stage . '-index.html' or die;
print F $mech->content();
close F;
}
my @thumburls = grep(/sraid/, split("\n", $mech->content()));
my $numpics = scalar(@thumburls);
print ", with $numpics pic(s)\n";
my $pathname = './' . $title . ' ' . $albumid;
mkdir $pathname;
my $counter = 0;
my $success = 0;
my $failure = 0;
my $skipped = 0;
print "@ Downloading\n";
foreach (@thumburls) {
my ($prefix) = $_ =~ /http(.+)\.thumb\.jpg/;
next unless $prefix;
$counter++;
my $filename = $pathname . sprintf('/%08d.jpg', $counter);
print " => $filename";
if (-e $filename) {
$skipped++;
print " [skipping]";
} else {
sleep(1);
my $origurl = 'http' . $prefix . '.orig.jpg';
$mech->get($origurl);
if ($mech->success()) {
$success++;
open F, ">$filename" or die "Cannot open file [$filename].\n";
binmode F;
print F $mech->content();
close F;
print " OK";
} else {
$failure++;
print " Failed";
}
}
print "\n";
}
print "Finished downloading $success file(s)";
print ", skipped $skipped file(s)" if ($skipped);
print ".\n";
print "\n$failure download(s) failed. Rerun.\n" if ($failure);