Incomplete list of tv programmes

Claves do Amaral claves.doamaral at gmail.com
Sun Nov 3 18:23:41 EST 2013


On 3 November 2013 21:24, Claves do Amaral <claves.doamaral at gmail.com> wrote:
> It would have come handy. In fact the solution I found is really
> "dirty", parsing the HTML page and extracting the list of programmes

If you think this utility could  be of any interest, here I paste the code.

Thanks,
Claves

Usage: get_iplayer_search.sh "david attenborough"
/media/USBHDD/media/Video/PVR/Factual/

pi at raspberrypi ~/skripte/iplayer $ cat get_iplayer_search.sh
#!/bin/bash

search_string=${1:?'bad string'}
output=${2:?'bad folder'}
html_file=temp_html_file
temp_dir=$(mktemp -d)
temp_file="$temp_dir/$html_file"
touch $temp_file
echo "File $temp_file created"
perl -w iplayer_search.pl "$search_string" $temp_dir $html_file

pids=()
while read line;
do
pids+=($line)
done < $temp_file

echo "Removing temporary file $temp_file"
rm -f $temp_file
rmdir -v $temp_dir

echo "Found ${#pids[@]} search results"

if [ "${#pids[@]}" -ge 5 ]; then
read -n1 -p "Found more than 5 results. Continue? [y,n]" doit
case $doit in
  n|N) echo; echo Aborting; exit ;;
  *) echo; echo Going on ;;
esac
fi

for pid in "${pids[@]}"
do
get-iplayer --modes=best --subtitles --output $output --pid $pid
done

exit


pi at raspberrypi ~/skripte/iplayer $ cat iplayer_search.pl
#!/usr/bin/perl -w

use strict;
use warnings;
use HTML::TreeBuilder 5 -weak;
use Data::Dumper;
use URI::Escape;

die "Usage: $0 iplayer_search.pl [SEARCH] [DIR] [FILE]\n" if @ARGV < 3;

open FILE, ">", "$ARGV[1]/$ARGV[2]" or die $!;

my $bbc_url = 'http://www.bbc.co.uk';
my $iplayer_search_url = $bbc_url.'/iplayer/search?q=';
my $search_string_encoded = uri_escape($ARGV[0]);
my $search_url = $iplayer_search_url.$search_string_encoded;
my $first_search = 1;
my $next_page = '';

{ do {
my $tree = HTML::TreeBuilder->new_from_url($search_url);
$tree->elementify();
my $results = $tree->look_down(_tag => 'p', class => 'result-count');
if(!defined $results || $results eq '') {
if($first_search) {
print "No results found for \"$ARGV[0]\"\n";
}
last;
}
$first_search =  0;
my @episodes = $tree->look_down(_tag => 'a', href =>
qr/\/iplayer\/episode\/[a-zA-Z0-9_]+\//);
foreach (@episodes) {
if($_->attr('href') =~ /\/iplayer\/episode\/([a-zA-Z0-9_]+)\//) {
print FILE "$1\n";
}
}
my $next_page_elem = $tree->look_down(_tag => 'a', title => qr/Next page/);
if(!defined $next_page_elem) {
last;
}
$next_page = $next_page_elem->attr('href');
$next_page =~ s/^\s+|\s+$//g;
$search_url = $bbc_url.$next_page;
} while(1); }

close FILE;

exit 0;



More information about the get_iplayer mailing list