#!/usr/bin/perl # # Updated by JWS to work with the new database scheme (to get the correct directory name) # September 23rd 2007. www.summet.com/blog # -Added support for multiple runs (if albumart.jpg already exists, we # skip that album/artist.) # -Added search only by album title if album+artist search fails. Helps "Various Artist" albums. # # - Fixes by Mike Wison wizREMOVEMEgnome@gmailDOT.com to make it work # with amazon updats May 4th 2012. # Note: Replace MYTHUSER and MYTHPASSWORD with the appropriate values form your .mythtv/mysql.txt file. # And rename this file to .pl (execute with "perl amazon_album_art_scraper.pl") # # # Known gotchas: # -If you have an album where each track has a different artist (soundtracks, etc), the script # will search for each album/artist combination. Sometimes this helps, most of the time it just # wastes bandwidth and time. # -If each of your album's is NOT in it's own directory, the albumart.jpg will keep getting overwritten. # Sorry, if you use some funky directory organizing system, re-work the script to work for you. Otherwise, # just do what 90% of the other music listeners do and accept the (sane, reasonable) defaults offered # by your music ripping software of choice. # -Obviously, the matching of album art to keywords is only as good as the CDDB database and Amazon # searchs, so sometimes you will get a bogus image. Hey, at least it's not from gotse... # # #Amazon Album Art Scraper. Originally by Thierry at #http://scraping.icebo.org/index.php/2007/08/07/amazon-album-art-scraper/ # Aug 7th 2007. # # use File::Basename; use DBI; use LWP::UserAgent; use Sys::Hostname; print "Amazon AlbumArt Scraper\n"; print "=======================\n"; my $hostname = hostname; print "[+] Hostname Found: $hostname\n"; my $dbh = DBI->connect('dbi:mysql:database=mythconverg;host=localhost;', 'MYTHUSER', 'MYTHPASSWORD') || die "[-] Cannot connect to DB\n"; $dbh->{'mysql_auto_reconnect'}=1; my $sth = $dbh->prepare("select data from settings where value = 'MusicLocation' and hostname = ?") || die "[-] Cannot prepare SQL statement 1\n";; my $sth1 = $dbh->prepare("select distinct music_songs.album_id,album_name, artist_name from music_songs, music_albums, music_artists where music_songs.artist_id = music_artists.artist_id and music_songs.album_id = music_albums.album_id order by rand()") || die "[-] Cannot prepare SQL statement 2\n"; my $sth2 = $dbh->prepare("select directory_id from music_songs where album_id = ? limit 1") || die "[-] Cannot prepare SQL statement 3\n"; my $ua = LWP::UserAgent->new( agent => "" ) or die "[-] Cannot Create UserAgent\n"; my $sth3 = $dbh->prepare("select path from music_directories where directory_id = ? limit 1") || die "[-] Cannot prepare SQL statement 4\n"; $sth->execute($hostname) || die "[-] Error Fetching MythMusic Directory\n"; my $directory = $sth->fetchrow_array; if ($directory) { print "[+] MythMusic Directory found at $directory\n"; } else { print "[-] MythMusic Directory not found\n"; exit; } $sth->finish; my $rows = $sth1->execute; print "[+] $rows albums found\n"; while (my ($id, $album, $artist) = $sth1->fetchrow_array) { my ($imageurl, $dirID, $dirname, $searchstring,$amazon_searchstring, $content, $resp, $jpeg) = undef; $sth2->execute($id); my $dirID = $sth2->fetchrow_array; $sth3->execute($dirID); my $dirname = $sth3->fetchrow_array; #Check if we already have album art for this song: $filename = "$directory$dirname/albumart.jpg"; if (-e $filename) { print "[+] You already have album-art for $album\n"; next; } print "[+] Fetching on $album by $artist ...\t"; my $searchstring = "$artist+$album"; $searchstring =~ s/ /+/g; my $amazon_searchstring = "http://www.amazon.com/s/?initialSearch=1&url=search-alias%3Dpopular&field-keywords=$searchstring&Go.x=0&Go.y=0&Go=Go"; my $resp = $ua->get("$amazon_searchstring"); unless ($resp->is_success) { print "[-] Error Searching Amazon For Album and Artist. Exiting...\n"; next; } $content = $resp->content; #If the more specific search didn't find anything, try just the album name. This helps for # album's are listed as being by "Multiple Artists" or "Various Artists" such as soundtracks. if ($content =~ /did not match any/) { print "Not Found.\n Fetching on $album ...\t"; $searchstring = "$album"; $searchstring =~ s/ /+/g; $amazon_searchstring = "http://www.amazon.com/s/?initialSearch=1&url=search-alias%3Dpopular&field-keywords=$searchstring&Go.x=0&Go.y=0&Go=Go"; $resp = $ua->get("$amazon_searchstring"); unless ($resp->is_success) { print "[-] Error Searching Amazon for Album. Next...\n"; next; } $content = $resp->content; if ($content =~ /did not match any/) { print "Not Found. Next...\n"; sleep 1; next; } } while ($content =~ /class\="resultCount\">Showing (\d+) Result/igm){ $results = $1; } print "$results Results\n"; if ($content =~ /get("$albumurl") || sub{ print "[-] Cannot Fetch Album Info\n";next;}; #$content = $resp->content; #while ($content =~ /registerImage\(\"original_image\",.\"(.+?)"/igm) { $imageurl = $1; #"Larger" product images replace AA115 with SS500... $imageurl =~ s/AA115/SS500/; } if ($imageurl) { print "[+] Cover Image Found at $imageurl\n"; } else { print "[-] No Cover Image Found\n"; next; } #Skip the "no image avaliable" images... if ($imageurl =~ /no-img-lg/) { print " Skipping this \"No Image Avaliable\" image. Next...\n"; sleep 2; next; } $resp = $ua->get("$imageurl"); $jpeg = $resp->content; $filename = "$directory$dirname/albumart.jpg"; print "[+] Saving to $filename\n"; open(FILE, ">", "$filename") || die "[-] Cannot open file: $!\n"; print FILE $jpeg; close(FILE); #If Amazon was nice enough to make album art avaliable to us, let's be nice to Amazon #and pause for 10 seconds until we search for the next one. #If you want to be an asshole and overwhelm amazon's servers (or at least make them notice #that we are scraping their album art) and break this script by changing their page layout #go ahead and remove the next line. sleep 10; } print "Finished...cleaning up...\n"; $sth1->finish; $sth2->finish; $sth2->finish; $dbh->disconnect;