p5_tumblr_v1.pl

(plain text)


#!/Users/pukku/perl5/perlbrew/perls/current/bin/perl
use Modern::Perl '2012';
use experimental 'switch';    # because the perl maintainers are annoying
use Carp;
use Data::Dump qw/pp/;

use Getopt::Long;

use LWP::Simple;              # for the initial get of data
use LWP::UserAgent;           # for retrieving the images

use JSON;
use Encode;
use Web::Query;

use Data::Section -setup;
use MIME::Base64 qw/encode_base64/;
use Template::Simple;

my $basedir    = '/Users/pukku/Pictures/tumblr/';    # keep photos in subdirectories of here
my $url        = '';
my $only_photo = 0;
GetOptions( "url=s" => \$url, "photo" => \$only_photo, "basedir=s" => \$basedir )
  or die("Could not parse options.\n");

my $tumblr_data = get_tumblr_data($url);

my $tumblelog = $tumblr_data->{'tumblelog'};
if ( scalar( @{ $tumblr_data->{'posts'} } ) != 1 ) {
    die("Too many posts retrieved.\n");
}
my $post = $tumblr_data->{'posts'}[ 0 ];
my $type = $post->{"type"};

# create a general holder
my $post_data = get_common_data( $tumblelog, $post );
my $output;    # needed to be predefined in case we GOTO

# special case for only downloading the photo
# @TODO add this for video?
if ( ( $type eq 'photo' ) and ( $only_photo or download_only_photo($post) ) ) {
    my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 } );
    my $resp = $ua->get( $post->{'photo-url-1280'} );
    if ( $resp->is_success ) {
        $output = {
            'dir'  => $basedir . $tumblelog->{'name'},
            'file' => $post->{'id'} . '--' . $resp->filename(),
            'data' => $resp->content(),
        };
        goto CREATE_OUTPUT;
    }
    else {
        die( "Could not download just the photo: " . $resp->status_line . "\n" );
    }
}

# call handlers unique to each data type
given ($type) {
    when ('photo') {
        add_photo_data($post_data);
    }

    when ('regular') {
        add_regular_data($post_data);
    }

    when ('answer') {
        add_answer_data($post_data);
    }

    when ('video') {
        add_video_data($post_data);
    }

    when ('quote') {
        add_quote_data($post_data);
    }

    default {
        die("Unknown post type: $type\n");
    }
}

$output = create_output_data($post_data);

# create the output file
CREATE_OUTPUT:
{
    mkdir $output->{'dir'};
    open( my $out, '>', $output->{'dir'} . '/' . $output->{'file'} )
      or die("Could not create output file $output->{'dir'}/$output->{'file'}: $!\n");
    print $out $output->{'data'};
    close($out);

    if ( $output->{'download'} ) {
        foreach my $dl ( @{ $output->{'download'} } ) {
            my $res = getstore( $dl->{'url'}, $output->{'dir'} . '/' . $dl->{'name'} );
            if ( !is_success($res) ) {
                warn("video download failed: $res\n");
            }
        }
    }
}

#################

# retrieve the data from the server
sub get_tumblr_data {
    my ($url) = @_;
    $url .= '?format=json';

    my $data = get($url);

    if ( !defined($data) or ( $data eq '' ) ) {
        croak("no data\n");
    }

    if ( $data !~ m/^var tumblr_api_read/ ) {
        croak("not a tumblr\n");
    }

    $data =~ s/^var tumblr_api_read = //;
    $data =~ s/;$//;

    return decode_json($data);
}

# pull out the data common to all types
sub get_common_data {
    my ( $t, $p ) = @_;

    my $post = {
        '_tumblelog' => $t,
        '_post'      => $p,

        'tumblr_key'   => $t->{'name'},
        'tumblr_title' => $t->{'title'},

        'post_id'   => $p->{'id'},
        'post_slug' => $p->{'slug'} || $p->{'type'},
        'post_url'  => $p->{'url-with-slug'},
        'post_date' => $p->{'date-gmt'},

    };

    # get any tags
    if ( defined( $p->{'tags'} ) and scalar( @{ $p->{'tags'} } ) ) {
        $post->{'tags'}{'tgs'} = [];
        foreach my $tag ( @{ $p->{'tags'} } ) {
            push @{ $post->{'tags'}{'tgs'} }, { 'tag' => $tag };
        }
    }
    else {
        $post->{'tags'} = '';
    }

    return $post;
}

# handle the photo type
sub add_photo_data {
    my ($post) = @_;
    my $p = $post->{'_post'};

    $post->{'post_body'} = $p->{'photo-caption'} || '';
    $post->{'photos'} = [];

    # if there is only one photo, tumblr puts the photo data at the top level.
    # if there is more than one, it is in a sub-array. this is inconsistant.
    # so we fix things so that the data is consistant.
    if ( !scalar( @{ $p->{'photos'} } ) ) {
        push @{ $post->{'photos'} }, {
            'photo_caption' => '',
            'photo_url'     => $p->{'photo-url-1280'},
        };
    }
    else {
        foreach my $ph ( @{ $p->{'photos'} } ) {
            push @{ $post->{'photos'} }, {
                'photo_caption' => $ph->{'caption'},
                'photo_url'     => $ph->{'photo-url-1280'},
            };
        }
    }

    # go through and download each photo
    my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 } );    # ideally we would verify, but somethings broken right now
    foreach my $i ( @{ $post->{'photos'} } ) {
        my $data = download_encoded_image( $i->{'photo_url'} );
        if ( substr( $data, 0, 4 ) eq 'data' ) {
            $i->{'photo_data'} = $data;
        }
        else {
            $i->{'photo_data'}    = '';
            $i->{'photo_caption'} = '<p class="error">' . $data . '</p>' . $i->{'photo_caption'};
        }
    }
}

sub add_regular_data {
    my ($post) = @_;
    my $p = $post->{'_post'};

    $post->{'post_body'} = $p->{'regular-body'};

    if ( defined( $p->{'regular-title'} ) and ( $p->{'regular-title'} ne '' ) ) {
        $post->{'post_body'} = '<h2>' . $p->{'regular-title'} . '</h2>' . "\n\n" . $post->{'post_body'};
    }
}

sub add_answer_data {
    my ($post) = @_;
    my $p = $post->{'_post'};

    $post->{'post_body'} = ''
      . '<div class="answer-q">' . "\n" . $p->{'question'} . "\n" . '</div>' . "\n"
      . '<div class="answer-a">' . "\n" . $p->{'answer'} . "\n" . '</div>';
}

sub add_video_data {
    my ($post) = @_;
    my $p = $post->{'_post'};

    $post->{'post_body'} = $p->{'video-caption'} || '';
    $post->{'videos'} = [];
    my $suffix = '';

    my $pwq    = wq( '<div>' . $p->{'video-player'} . '</div>' );
    my $vidtag = $pwq->find('video[data-crt-options]');
    if ( $vidtag->size() ) {
        my $vidopts = decode_json( $vidtag->attr('data-crt-options') );
        if ( $vidopts->{'hdUrl'} ) {
            $suffix = '.mp4';
            push @{ $post->{'_videodownload'} }, { 'url' => $vidopts->{'hdUrl'}, 'name' => $p->{'id'} . $suffix };
        }
        else {
            my $source = $vidtag->find('source');
            if ( $source->size() ) {
                my $vurl  = $source->attr('src');
                my $vtype = $source->attr('type');
                if ( ( $vtype eq 'video/mp4' ) and ( $vurl =~ m/video_file/ ) ) {
                    $suffix = '.mp4';
                    push @{ $post->{'_videodownload'} }, { 'url' => $vurl, name => $p->{'id'} . $suffix };
                }
            }
        }
    }

    if ( !$post->{'_videodownload'} ) {
        warn("video type not supported.\n");
        warn( pp( $p->{'video-player'} ) );
    }

    push @{ $post->{'videos'} }, {
        'source'     => $p->{'video-source'} . "\n" . $p->{'video-player'},
        'controller' => '<video src="' . $p->{'id'} . $suffix . '" width="" height="" controls preload allowfullscreen></video>',
    };
}

sub add_quote_data {
    my ($post) = @_;
    my $p = $post->{'_post'};

    $post->{'post_body'} = ''
      . '<div class="quote-text">' . "\n" . $p->{'quote-text'} . "\n" . '</div>' . "\n"
      . '<div class="quote-source">' . "\n" . $p->{'quote-source'} . "\n" . '</div>';
}

# wrap up the logic to create the output data
sub create_output_data {
    my ($post) = @_;

    my $tmpl     = Template::Simple->new();
    my $template = ${ main->section_data('wrapper') };
    my $rendered = encode( 'utf-8', ${ $tmpl->render( \$template, $post ) } );

    my $output = {
        'dir'  => $basedir . $post->{'tumblr_key'},
        'file' => $post->{'post_id'} . '--' . $post->{'post_slug'} . '.html',
        'data' => $rendered,
    };

    if ( $post->{'_videodownload'} ) {
        push @{ $output->{'download'} }, @{ $post->{'_videodownload'} };
    }

    return $output;
}

# download an image file and turn it into base64 encoded inline data
# if everything works, the first characters of the returned string will be 'data'
# if an error occurs, the first characters of the returned string will be 'error'
sub download_encoded_image {
    my ($url) = @_;
    my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 } );    # ideally we would verify
    my $resp = $ua->get($url);
    if ( $resp->is_success ) {
        my $content_type = $resp->header('Content-Type');
        my $data = encode_base64( $resp->content(), '' );
        return 'data:' . $content_type . ';base64,' . $data;
    }
    else {
        return 'error: ' . $resp->status_line();
    }
}

# test to see if there is anything besides the photo to be downloaded
# if not, we can avoid the overhead of base64ing the data
sub download_only_photo {
    my ($p) = @_;
    if (
        ( !defined( $p->{'photo-caption'} ) or ( $p->{'photo-caption'} eq '' ) )    # there is no caption
        and ( !scalar( @{ $p->{'photos'} } ) )                                      # there is only one photo
        and ( !defined( $post->{'tags'} ) or !scalar( @{ $p->{'tags'} } ) )         # there are no tags
      )
    {
        return 1;
    }
    else {
        return 0;
    }
}

__DATA__

__[ wrapper ]__
<!DOCTYPE html>
<html lang="en">
<head>
	<meta charset="utf-8" />
	<title>[% tumblr_title %] — [% post_id %] — [% post_slug %]</title>
<!-- #bbinclude "../tumblr.css" -->
	<style type="text/css">
		body {
			margin: 0;
			padding: 0;
		}
		article {
			margin: 0;
			padding: 0;
			padding: 0.5rem;
			background: #EEEEEE;
			border: 0.25rem solid #DDDDDD;
		}
		article header {
			margin: 0;
			margin-bottom: 1rem;
			padding: 0;
			border-bottom: thin solid #BBBBBB;
		}
		article header h1 {
			margin: 0;
			padding: 0;
			margin-top: 1rem;
			margin-bottom: 0.25rem;
			font-size: 1.2rem;
		}
		article header p.date {
			margin: 0;
			padding: 0;
			margin-top: 0.5rem;
			margin-bottom: 0.5rem;
			font-size: 0.8rem;
		}
		article header p.tags {
			margin: 0;
			padding: 0;
			margin-top: 0.5rem;
			margin-bottom: 0.5rem;
			font-size: 0.8rem;
			font-style: italic;
		}
		article header p.tags span.tag {
			padding-right: 1rem;
		}
		article figure img {
			max-width: 100%;
		}
		article blockquote {
			margin-left: 1rem;
			border-left: thin dashed #CCCCCC;
			padding-left: 0.5rem;
			margin-right: 0;
		}
		video {
			max-width: 100%;
		}
	</style>
<!-- end bbinclude -->
	<style type="text/css">
	</style>
</head>
<body>
<article>
<header>
	<h1><a href="[% post_url %]">[% tumblr_title %] — [% post_id %] — [% post_slug %]</a></h1>
	<p class="date">[% post_date %]</p>
	[% START tags %]<p class="tags">[% START tgs %]<span class="tag">#[% tag %]</span>[% END tgs %]</p>[% END tags %]
</header>

[% START photos %]
<figure>
	<a href="[% photo_url %]"><img src="[% photo_data %]" /></a>
	<figcaption>[% photo_caption %]</figcaption>
</figure>
[% END photos %]
[% START videos %]
<!-- [% source %] -->
[% controller %]
[% END videos %]

[% post_body %]

</article>
</body>
</html>