#!/usr/bin/perl # # Perl script to fetch all videos in a playlist. # Copyright © 2008-2013 Kai Wasserbäch # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . # # # This is the rewrite of the original shell script in Perl, because it should # be easier to cope with YT's HTML changes this way, since we have stuff like # Web::Scraper at our disposal. # use strict; use warnings; use utf8; binmode STDOUT, ":encoding(utf-8)"; binmode STDERR, ":encoding(utf-8)"; use Getopt::Long::Descriptive; use Web::Scraper; use URI; my $bSysWhich = eval "use File::Which; 1;" ? 0 : 1; # boiler plate print <<"WELCOME"; YTPlaylistFetcher Copyright © 2008-2013 Kai Wasserbäch Version: 0.3.1 This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it under certain conditions (GPLv3); see for details. WELCOME # check if we have a system with which if($bSysWhich && ! -x '/usr/bin/which') { die("Please install File::Which or switch to a system with the which " ."executable!"); } # functions sub errorExit { my ($oUsg, $sError) = @_; printf STDERR "ERROR: %s\n\n%s", $sError, $oUsg->text; exit 1; } sub locateBinary { my $sBin = undef; # try finding CCLive first. if($bSysWhich) { $sBin = `which cclive`; chomp($sBin); } else { $sBin = which('cclive'); } if($sBin && -x $sBin) { return $sBin; } # no cclive, let us try CLive now if($bSysWhich) { $sBin = `which clive`; chomp($sBin); } else { $sBin = which('clive'); } if($sBin && -x $sBin) { return $sBin; } else { return undef; } } # main prgram flow my ($aOpts, $oUsage) = describe_options( '%c %o ', ['yt-dump-urls|d', 'Print the extracted video URLs on STDOUT.'], ['yt-dump-to-file|f=s', 'Dump URLs to file.'], ['yt-pass-on|o=s', 'Option string to pass on to CLive/CCLive. ' . 'If your option string contains spaces, you need to quote the string.'], ['help|h', 'Show this usage information.'] ); if($aOpts->help) { print $oUsage->text; exit; } # exit early on various error conditions errorExit($oUsage, 'No URL given!') if($#ARGV + 1 != 1); my $sPlaylistId = $ARGV[0]; unless($sPlaylistId =~ m/^https?:\/\/www\.youtube\.com\//) { errorExit($oUsage, 'No URL to YouTube given! This works only with YT playlists'); } my $sBinary = undef; unless($aOpts->yt_dump_to_file || $aOpts->yt_dump_urls) { $sBinary = locateBinary(); unless($sBinary) { errorExit($oUsage, "Neither CCLive nor CLive were found!"); } } if($aOpts->yt_dump_to_file && -e $aOpts->yt_dump_to_file) { errorExit($oUsage, sprintf("»%s« exists already!", $aOpts->yt_dump_to_file)); } # Extract the ID of the playlist from the passed URL and construct our standard # URL. # This is done for two reasons: # 1. We want to fetch the playlist details site with known layout. # 2. We want to clean out any additional parameters for privacy reasons. # # Recognized playlist URL formats: # 1. Classic format: http://www.youtube.com/view_play_list?p=[PLAYLISTID] # 2. Embedded format: http://www.youtube.com/p/[PLAYLISTID] # 3. User page format: http://www.youtube.com/user/[USERNAME]#([gp]/c)|(grid/user)/[PLAYLISTID] # 4. New parameter style: an URL containing list=PL[PLAYLISTID] # 5. Classic replacement: youtube.com/playlist?p=PL[PLAYLISTID] $sPlaylistId =~ s/^(.*)(p=(PL)?|\/p\/|#([gp]|grid)\/(c|user)\/|list=PL)([0-9a-zA-Z_\-]{16,33})(.*)$/PL$6/; my $oURI = URI->new('https://www.youtube.com/playlist?list='.$sPlaylistId) || errorExit($oUsage, "Couldn't create URL object!"); my $oVideos = scraper { process '//li[contains(@class,"playlist-video-item")]' . '//a[contains(@class, "yt-uix-tile-link")]', "videos[]" => '@href'; }; my $aRes = $oVideos->scrape($oURI); errorExit($oUsage, "No video URLs found! This might be a network issue or " ."YouTube changed its page\n structure.") unless($aRes->{videos}); # setup a variable for our file handle. my $hDumpFile = undef; if($aOpts->yt_dump_to_file) { open($hDumpFile, '>:encoding(UTF-8)', $aOpts->yt_dump_to_file) || errorExit($oUsage, sprintf("Can't open »%s« for writing!", $aOpts->yt_dump_to_file)); } foreach my $sVidUrl (@{$aRes->{videos}}) { my $sVidId = $sVidUrl; $sVidId =~ s/^(https?:\/\/www\.youtube\.com\/watch\?)(.*)?(v=[a-zA-Z0-9_\-]+)(.*)$/$3/; $sVidId = substr($sVidId, 2); if($aOpts->yt_dump_urls) { printf("https://www.youtube.com/watch?v=%s\n", $sVidId); } elsif($aOpts->yt_dump_to_file) { printf($hDumpFile "https://www.youtube.com/watch?v=%s\n", $sVidId); } else { my $sParam = sprintf('%shttps://www.youtube.com/watch?v=%s', $aOpts->yt_pass_on ? $aOpts->yt_pass_on . ' ' : '', $sVidId); `$sBinary $sParam`; } } if($aOpts->yt_dump_to_file) { close($hDumpFile); }