#!/usr/bin/env perl use Modern::Perl '2020'; use open qw(:std :utf8); use Path::Tiny; use Mojo::DOM58; use Data::Dump 'pp'; # get the most recent bookmark file my $bf; { my @paths = path('bookmarks/')->children(qr/\.html$/); @paths = sort @paths; $bf = $paths[ -1 ]; } my $html = path($bf)->slurp_utf8(); my $dom = Mojo::DOM58->new($html); my $data = {}; $dom->find('li.bookmark')->each( sub { my $li = shift; my $bid = $li->attr('id'); $bid =~ s[_][s/]; # bookmark ids look like 'bookmark_XXXXX'; the URLs look like 'bookmarks/XXXXX' # grab the url and title my $liurl; my $title; if ( my $a = $li->at('h4 a:first-of-type') ) { $liurl = $a->attr('href'); $title = $a->content(); } # if there is no URL, there there is something wrong with the bookmark if ( $liurl eq '' ) { warn "missing something: " . $li->to_string(); return; } # extract any series that this work belongs to my $surls = []; $li->find('ul.series li')->each( sub { my $sli = shift; if ( my $a = $sli->at('a:first-of-type') ) { push @$surls, $a->attr('href'); } } ); # the link URL is the primary key $data->{$liurl} = { 'i' => $bid, # id 't' => $title, # title 'u' => $surls, # series urls }; } ); # for each bookmarked work, go through every series it is a part of # and if that series is also bookmarked, provide the necessary information to fix it foreach my $url ( sort keys %$data ) { if ( scalar( $data->{$url}->{'u'}->@* ) ) { foreach my $s ( $data->{$url}->{'u'}->@* ) { if ( defined( $data->{$s} ) ) { say "double-bookmark: $url [$data->{$url}->{'t'}; $data->{$url}->{'i'}] and series $s [$data->{$s}->{'t'}; $data->{$s}->{'i'}]"; } } } }