find_double_bookmarked.pl


Menlo | FiraCode
#!/usr/bin/env perl
use Modern::Perl '2020';
use open qw(:std :utf8);
use Path::Tiny;
use Mojo::DOM58;
use Data::Dump 'pp';

# get the most recent bookmark file
my $bf;
{
    my @paths = path('bookmarks/')->children(qr/\.html$/);
    @paths = sort @paths;
    $bf    = $paths[ -1 ];
}

my $html = path($bf)->slurp_utf8();
my $dom  = Mojo::DOM58->new($html);

my $data = {};

$dom->find('li.bookmark')->each( sub {
    my $li  = shift;
    my $bid = $li->attr('id');
    $bid =~ s[_][s/];    # bookmark ids look like 'bookmark_XXXXX'; the URLs look like 'bookmarks/XXXXX'

    # grab the url and title
    my $liurl;
    my $title;
    if ( my $a = $li->at('h4 a:first-of-type') ) {
        $liurl = $a->attr('href');
        $title = $a->content();
    }

    # if there is no URL, there there is something wrong with the bookmark
    if ( $liurl eq '' ) {
        warn "missing something: " . $li->to_string();
        return;
    }

    # extract any series that this work belongs to
    my $surls = [];
    $li->find('ul.series li')->each( sub {
        my $sli = shift;
        if ( my $a = $sli->at('a:first-of-type') ) {
            push @$surls, $a->attr('href');
        }
    } );

    # the link URL is the primary key
    $data->{$liurl} = {
        'i' => $bid,      # id
        't' => $title,    # title
        'u' => $surls,    # series urls
    };
} );

# for each bookmarked work, go through every series it is a part of
# and if that series is also bookmarked, provide the necessary information to fix it
foreach my $url ( sort keys %$data ) {
    if ( scalar( $data->{$url}->{'u'}->@* ) ) {
        foreach my $s ( $data->{$url}->{'u'}->@* ) {
            if ( defined( $data->{$s} ) ) {
                say "double-bookmark: $url [$data->{$url}->{'t'}; $data->{$url}->{'i'}] and series $s [$data->{$s}->{'t'}; $data->{$s}->{'i'}]";
            }
        }
    }
}