#!/usr/bin/perl
#
# Updated by JWS to work with the new database scheme (to get the correct directory name)
# September 23rd 2007. www.summet.com/blog
# -Added support for multiple runs (if albumart.jpg already exists, we
# skip that album/artist.)
# -Added search only by album title if album+artist search fails. Helps "Various Artist" albums.
#
# - Fixes by Mike Wison wizREMOVEMEgnome@gmailDOT.com to make it work
#   with amazon updats May 4th 2012.
# Note: Replace MYTHUSER and MYTHPASSWORD with the appropriate values form your .mythtv/mysql.txt file.
# And rename this file to .pl (execute with "perl amazon_album_art_scraper.pl")
#
#
# Known gotchas: 
# -If you have an album where each track has a different artist (soundtracks, etc), the script
#  will search for each album/artist combination. Sometimes this helps, most of the time it just
#  wastes bandwidth and time.
# -If each of your album's is NOT in it's own directory, the albumart.jpg will keep getting overwritten.
#  Sorry, if you use some funky directory organizing system, re-work the script to work for you. Otherwise, 
#  just do what 90% of the other music listeners do and accept the (sane, reasonable) defaults offered
#  by your music ripping software of choice.
# -Obviously, the matching of album art to keywords is only as good as the CDDB database and Amazon
#  searchs, so sometimes you will get a bogus image. Hey, at least it's not from gotse...
#
#
#Amazon Album Art Scraper. Originally by Thierry  at
#http://scraping.icebo.org/index.php/2007/08/07/amazon-album-art-scraper/
# Aug 7th 2007.
#
#
use File::Basename;
use DBI;
use LWP::UserAgent;
use Sys::Hostname;

print "Amazon AlbumArt Scraper\n";
print "=======================\n";


my $hostname = hostname;
print "[+] Hostname Found: $hostname\n";
my $dbh = DBI->connect('dbi:mysql:database=mythconverg;host=localhost;', 'MYTHUSER', 'MYTHPASSWORD') || die "[-] Cannot connect to DB\n";
$dbh->{'mysql_auto_reconnect'}=1;
my $sth = $dbh->prepare("select data from settings where value = 'MusicLocation' and hostname = ?") || die "[-] Cannot prepare SQL statement 1\n";;

my $sth1 = $dbh->prepare("select distinct music_songs.album_id,album_name, artist_name from music_songs, music_albums, music_artists where music_songs.artist_id = music_artists.artist_id and music_songs.album_id = music_albums.album_id order by rand()") || die "[-] Cannot prepare SQL statement 2\n";

my $sth2 = $dbh->prepare("select directory_id from music_songs where album_id = ? limit 1") || die "[-] Cannot prepare SQL statement 3\n";

my $ua = LWP::UserAgent->new( agent => "" ) or die "[-] Cannot Create UserAgent\n";

my $sth3 = $dbh->prepare("select path from music_directories where directory_id = ? limit 1") || die "[-] Cannot prepare SQL statement 4\n";


$sth->execute($hostname) || die "[-] Error Fetching MythMusic Directory\n";
my $directory = $sth->fetchrow_array;
if ($directory) {
	print "[+] MythMusic Directory found at $directory\n";
} else {
	print "[-] MythMusic Directory not found\n";
	exit;
}
$sth->finish;

my $rows = $sth1->execute;
print "[+] $rows albums found\n";
while (my ($id, $album, $artist) = $sth1->fetchrow_array) {
	my ($imageurl, $dirID, $dirname, $searchstring,$amazon_searchstring, $content, $resp, $jpeg) = undef;
	$sth2->execute($id);
	my $dirID = $sth2->fetchrow_array;
        $sth3->execute($dirID);
        my $dirname = $sth3->fetchrow_array;

        #Check if we already have album art for this song:
        $filename = "$directory$dirname/albumart.jpg";
        if (-e $filename) {
           print "[+] You already have album-art for $album\n";
           next;
        }

	print "[+] Fetching on $album by $artist ...\t";
	my $searchstring = "$artist+$album";
	$searchstring =~ s/ /+/g;
	my $amazon_searchstring = "http://www.amazon.com/s/?initialSearch=1&url=search-alias%3Dpopular&field-keywords=$searchstring&Go.x=0&Go.y=0&Go=Go";
	my $resp = $ua->get("$amazon_searchstring");
	unless ($resp->is_success) {
		print "[-] Error Searching Amazon For Album and Artist. Exiting...\n";
		next;
	}
	$content = $resp->content;

	#If the more specific search didn't find anything, try just the album name. This helps for
	# album's are listed as being by "Multiple Artists" or "Various Artists" such as soundtracks.
	if ($content =~ /did not match any/) {
		print "Not Found.\n    Fetching on $album ...\t";
 		$searchstring = "$album";
		$searchstring =~ s/ /+/g;
		$amazon_searchstring =  "http://www.amazon.com/s/?initialSearch=1&url=search-alias%3Dpopular&field-keywords=$searchstring&Go.x=0&Go.y=0&Go=Go";
		$resp = $ua->get("$amazon_searchstring");
		unless ($resp->is_success) {
			print "[-] Error Searching Amazon for Album. Next...\n";
			next;
		}	
		$content = $resp->content;
		if ($content =~ /did not match any/) {
			print "Not Found. Next...\n";
			sleep 1;
			next;
		}
	}


	while ($content =~ /class\="resultCount\">Showing (\d+) Result/igm){
		$results = $1;
	}
	print "$results Results\n";
	if ($content =~ /<img src\=\"(.+?)\" class\=(.+?)Details\"/ig) {
	#	$albumurl = $1;
	#}
	#$resp = $ua->get("$albumurl") || sub{ print "[-] Cannot Fetch Album Info\n";next;};
	#$content = $resp->content;
	#while ($content =~ /registerImage\(\"original_image\",.\"(.+?)"/igm) {
		$imageurl = $1;
		#"Larger" product images replace AA115 with SS500...
		$imageurl =~ s/AA115/SS500/;
		
	}
	if ($imageurl) {
		print "[+] Cover Image Found at $imageurl\n";
	} else {
		print "[-] No Cover Image Found\n";
		next;
	}

	#Skip the "no image avaliable" images...
 	if ($imageurl =~ /no-img-lg/) {
                        print "     Skipping this \"No Image Avaliable\" image. Next...\n";
			sleep 2;
                        next;
         }

	
	$resp = $ua->get("$imageurl");
	$jpeg = $resp->content;
	$filename = "$directory$dirname/albumart.jpg";
	print "[+] Saving to $filename\n";
	open(FILE, ">",  "$filename") || die "[-] Cannot open file: $!\n";
	print FILE $jpeg;
	close(FILE);
	#If Amazon was nice enough to make album art avaliable to us, let's be nice to Amazon
	#and pause for 10 seconds until we search for the next one.
	#If you want to be an asshole and overwhelm amazon's servers (or at least make them notice
	#that we are scraping their album art) and break this script by changing their page layout
	#go ahead and remove the next line.
	sleep 10;

	
}

print "Finished...cleaning up...\n";

$sth1->finish;
$sth2->finish;
$sth2->finish;
$dbh->disconnect;