p5_tumblr_v1.pl
(plain text)
#!/Users/pukku/perl5/perlbrew/perls/current/bin/perl
use Modern::Perl '2012';
use experimental 'switch'; # because the perl maintainers are annoying
use Carp;
use Data::Dump qw/pp/;
use Getopt::Long;
use LWP::Simple; # for the initial get of data
use LWP::UserAgent; # for retrieving the images
use JSON;
use Encode;
use Web::Query;
use Data::Section -setup;
use MIME::Base64 qw/encode_base64/;
use Template::Simple;
my $basedir = '/Users/pukku/Pictures/tumblr/'; # keep photos in subdirectories of here
my $url = '';
my $only_photo = 0;
GetOptions( "url=s" => \$url, "photo" => \$only_photo, "basedir=s" => \$basedir )
or die("Could not parse options.\n");
my $tumblr_data = get_tumblr_data($url);
my $tumblelog = $tumblr_data->{'tumblelog'};
if ( scalar( @{ $tumblr_data->{'posts'} } ) != 1 ) {
die("Too many posts retrieved.\n");
}
my $post = $tumblr_data->{'posts'}[ 0 ];
my $type = $post->{"type"};
# create a general holder
my $post_data = get_common_data( $tumblelog, $post );
my $output; # needed to be predefined in case we GOTO
# special case for only downloading the photo
# @TODO add this for video?
if ( ( $type eq 'photo' ) and ( $only_photo or download_only_photo($post) ) ) {
my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 } );
my $resp = $ua->get( $post->{'photo-url-1280'} );
if ( $resp->is_success ) {
$output = {
'dir' => $basedir . $tumblelog->{'name'},
'file' => $post->{'id'} . '--' . $resp->filename(),
'data' => $resp->content(),
};
goto CREATE_OUTPUT;
}
else {
die( "Could not download just the photo: " . $resp->status_line . "\n" );
}
}
# call handlers unique to each data type
given ($type) {
when ('photo') {
add_photo_data($post_data);
}
when ('regular') {
add_regular_data($post_data);
}
when ('answer') {
add_answer_data($post_data);
}
when ('video') {
add_video_data($post_data);
}
when ('quote') {
add_quote_data($post_data);
}
default {
die("Unknown post type: $type\n");
}
}
$output = create_output_data($post_data);
# create the output file
CREATE_OUTPUT:
{
mkdir $output->{'dir'};
open( my $out, '>', $output->{'dir'} . '/' . $output->{'file'} )
or die("Could not create output file $output->{'dir'}/$output->{'file'}: $!\n");
print $out $output->{'data'};
close($out);
if ( $output->{'download'} ) {
foreach my $dl ( @{ $output->{'download'} } ) {
my $res = getstore( $dl->{'url'}, $output->{'dir'} . '/' . $dl->{'name'} );
if ( !is_success($res) ) {
warn("video download failed: $res\n");
}
}
}
}
#################
# retrieve the data from the server
sub get_tumblr_data {
my ($url) = @_;
$url .= '?format=json';
my $data = get($url);
if ( !defined($data) or ( $data eq '' ) ) {
croak("no data\n");
}
if ( $data !~ m/^var tumblr_api_read/ ) {
croak("not a tumblr\n");
}
$data =~ s/^var tumblr_api_read = //;
$data =~ s/;$//;
return decode_json($data);
}
# pull out the data common to all types
sub get_common_data {
my ( $t, $p ) = @_;
my $post = {
'_tumblelog' => $t,
'_post' => $p,
'tumblr_key' => $t->{'name'},
'tumblr_title' => $t->{'title'},
'post_id' => $p->{'id'},
'post_slug' => $p->{'slug'} || $p->{'type'},
'post_url' => $p->{'url-with-slug'},
'post_date' => $p->{'date-gmt'},
};
# get any tags
if ( defined( $p->{'tags'} ) and scalar( @{ $p->{'tags'} } ) ) {
$post->{'tags'}{'tgs'} = [];
foreach my $tag ( @{ $p->{'tags'} } ) {
push @{ $post->{'tags'}{'tgs'} }, { 'tag' => $tag };
}
}
else {
$post->{'tags'} = '';
}
return $post;
}
# handle the photo type
sub add_photo_data {
my ($post) = @_;
my $p = $post->{'_post'};
$post->{'post_body'} = $p->{'photo-caption'} || '';
$post->{'photos'} = [];
# if there is only one photo, tumblr puts the photo data at the top level.
# if there is more than one, it is in a sub-array. this is inconsistant.
# so we fix things so that the data is consistant.
if ( !scalar( @{ $p->{'photos'} } ) ) {
push @{ $post->{'photos'} }, {
'photo_caption' => '',
'photo_url' => $p->{'photo-url-1280'},
};
}
else {
foreach my $ph ( @{ $p->{'photos'} } ) {
push @{ $post->{'photos'} }, {
'photo_caption' => $ph->{'caption'},
'photo_url' => $ph->{'photo-url-1280'},
};
}
}
# go through and download each photo
my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 } ); # ideally we would verify, but somethings broken right now
foreach my $i ( @{ $post->{'photos'} } ) {
my $data = download_encoded_image( $i->{'photo_url'} );
if ( substr( $data, 0, 4 ) eq 'data' ) {
$i->{'photo_data'} = $data;
}
else {
$i->{'photo_data'} = '';
$i->{'photo_caption'} = '<p class="error">' . $data . '</p>' . $i->{'photo_caption'};
}
}
}
sub add_regular_data {
my ($post) = @_;
my $p = $post->{'_post'};
$post->{'post_body'} = $p->{'regular-body'};
if ( defined( $p->{'regular-title'} ) and ( $p->{'regular-title'} ne '' ) ) {
$post->{'post_body'} = '<h2>' . $p->{'regular-title'} . '</h2>' . "\n\n" . $post->{'post_body'};
}
}
sub add_answer_data {
my ($post) = @_;
my $p = $post->{'_post'};
$post->{'post_body'} = ''
. '<div class="answer-q">' . "\n" . $p->{'question'} . "\n" . '</div>' . "\n"
. '<div class="answer-a">' . "\n" . $p->{'answer'} . "\n" . '</div>';
}
sub add_video_data {
my ($post) = @_;
my $p = $post->{'_post'};
$post->{'post_body'} = $p->{'video-caption'} || '';
$post->{'videos'} = [];
my $suffix = '';
my $pwq = wq( '<div>' . $p->{'video-player'} . '</div>' );
my $vidtag = $pwq->find('video[data-crt-options]');
if ( $vidtag->size() ) {
my $vidopts = decode_json( $vidtag->attr('data-crt-options') );
if ( $vidopts->{'hdUrl'} ) {
$suffix = '.mp4';
push @{ $post->{'_videodownload'} }, { 'url' => $vidopts->{'hdUrl'}, 'name' => $p->{'id'} . $suffix };
}
else {
my $source = $vidtag->find('source');
if ( $source->size() ) {
my $vurl = $source->attr('src');
my $vtype = $source->attr('type');
if ( ( $vtype eq 'video/mp4' ) and ( $vurl =~ m/video_file/ ) ) {
$suffix = '.mp4';
push @{ $post->{'_videodownload'} }, { 'url' => $vurl, name => $p->{'id'} . $suffix };
}
}
}
}
if ( !$post->{'_videodownload'} ) {
warn("video type not supported.\n");
warn( pp( $p->{'video-player'} ) );
}
push @{ $post->{'videos'} }, {
'source' => $p->{'video-source'} . "\n" . $p->{'video-player'},
'controller' => '<video src="' . $p->{'id'} . $suffix . '" width="" height="" controls preload allowfullscreen></video>',
};
}
sub add_quote_data {
my ($post) = @_;
my $p = $post->{'_post'};
$post->{'post_body'} = ''
. '<div class="quote-text">' . "\n" . $p->{'quote-text'} . "\n" . '</div>' . "\n"
. '<div class="quote-source">' . "\n" . $p->{'quote-source'} . "\n" . '</div>';
}
# wrap up the logic to create the output data
sub create_output_data {
my ($post) = @_;
my $tmpl = Template::Simple->new();
my $template = ${ main->section_data('wrapper') };
my $rendered = encode( 'utf-8', ${ $tmpl->render( \$template, $post ) } );
my $output = {
'dir' => $basedir . $post->{'tumblr_key'},
'file' => $post->{'post_id'} . '--' . $post->{'post_slug'} . '.html',
'data' => $rendered,
};
if ( $post->{'_videodownload'} ) {
push @{ $output->{'download'} }, @{ $post->{'_videodownload'} };
}
return $output;
}
# download an image file and turn it into base64 encoded inline data
# if everything works, the first characters of the returned string will be 'data'
# if an error occurs, the first characters of the returned string will be 'error'
sub download_encoded_image {
my ($url) = @_;
my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 } ); # ideally we would verify
my $resp = $ua->get($url);
if ( $resp->is_success ) {
my $content_type = $resp->header('Content-Type');
my $data = encode_base64( $resp->content(), '' );
return 'data:' . $content_type . ';base64,' . $data;
}
else {
return 'error: ' . $resp->status_line();
}
}
# test to see if there is anything besides the photo to be downloaded
# if not, we can avoid the overhead of base64ing the data
sub download_only_photo {
my ($p) = @_;
if (
( !defined( $p->{'photo-caption'} ) or ( $p->{'photo-caption'} eq '' ) ) # there is no caption
and ( !scalar( @{ $p->{'photos'} } ) ) # there is only one photo
and ( !defined( $post->{'tags'} ) or !scalar( @{ $p->{'tags'} } ) ) # there are no tags
)
{
return 1;
}
else {
return 0;
}
}
__DATA__
__[ wrapper ]__
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>[% tumblr_title %] — [% post_id %] — [% post_slug %]</title>
<!-- #bbinclude "../tumblr.css" -->
<style type="text/css">
body {
margin: 0;
padding: 0;
}
article {
margin: 0;
padding: 0;
padding: 0.5rem;
background: #EEEEEE;
border: 0.25rem solid #DDDDDD;
}
article header {
margin: 0;
margin-bottom: 1rem;
padding: 0;
border-bottom: thin solid #BBBBBB;
}
article header h1 {
margin: 0;
padding: 0;
margin-top: 1rem;
margin-bottom: 0.25rem;
font-size: 1.2rem;
}
article header p.date {
margin: 0;
padding: 0;
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.8rem;
}
article header p.tags {
margin: 0;
padding: 0;
margin-top: 0.5rem;
margin-bottom: 0.5rem;
font-size: 0.8rem;
font-style: italic;
}
article header p.tags span.tag {
padding-right: 1rem;
}
article figure img {
max-width: 100%;
}
article blockquote {
margin-left: 1rem;
border-left: thin dashed #CCCCCC;
padding-left: 0.5rem;
margin-right: 0;
}
video {
max-width: 100%;
}
</style>
<!-- end bbinclude -->
<style type="text/css">
</style>
</head>
<body>
<article>
<header>
<h1><a href="[% post_url %]">[% tumblr_title %] — [% post_id %] — [% post_slug %]</a></h1>
<p class="date">[% post_date %]</p>
[% START tags %]<p class="tags">[% START tgs %]<span class="tag">#[% tag %]</span>[% END tgs %]</p>[% END tags %]
</header>
[% START photos %]
<figure>
<a href="[% photo_url %]"><img src="[% photo_data %]" /></a>
<figcaption>[% photo_caption %]</figcaption>
</figure>
[% END photos %]
[% START videos %]
<!-- [% source %] -->
[% controller %]
[% END videos %]
[% post_body %]
</article>
</body>
</html>