manage_duplicates.pl


Menlo | FiraCode
#!/usr/bin/env perl
use Modern::Perl '2020';
use open qw(:std :utf8);
use POSIX qw (log10 floor);
use Path::Tiny;
use IO::Prompt::Simple;
use Cwd;
## use IO::Pager;
## use App::ccdiff;

# move into the works directory
chdir('./works');
say "CWD: " . getcwd();
my $okchdir = prompt( "ok?", { 'yn' => 1 } );
if ( $okchdir != 1 ) {
    say "Chdir failed, stopping.";
    exit(0);
}

my $duplicates  = {};
my $actions     = [];
my $uncompleted = [];

# get a list of the paths in the works directory with a hyphen, and
# match them by the work id
foreach my $hyphenated ( path('.')->children(qr/-/) ) {
    my $fname = $hyphenated->basename();
    my ($wid) = ( $fname =~ m/^(\d+)-/ );
    $duplicates->{$wid}->{$fname}++;
}

# check to see if any of the duplicates have a weird "third" file due
# to coming from multiple bookmarks and having changed.
# (ie, the first time it is encountered in the download script,
# there is a change, so it moves the old file, but then it sees it again
# so it creates a file as though it were new.)
foreach my $wid ( keys %$duplicates ) {
    if ( path( $wid . '.html' )->exists() ) {
        push @$uncompleted, [ $wid, "triple file" ];
        delete( $duplicates->{$wid} );
    }
}

# get a count of the number of works
my $total     = scalar( keys %$duplicates );
my $curr      = 1;
my $magnitude = floor( log10($total) ) + 1;
my $fmtstr    = "\%${magnitude}d/\%${magnitude}d";

# show diff results and ask for next steps
#$ENV{LESS} = '-R -X';    # have less (the pager) allow ANSI color escapes, and not reset the screen each time
foreach my $wid ( sort keys %$duplicates ) {
    if ( scalar( keys $duplicates->{$wid}->%* ) != 2 ) {
        push @$uncompleted, [ $wid, "duplicates count != 2" ];
        $curr++;
        next;
    }
    my ( $old, $new ) = sort keys $duplicates->{$wid}->%*;

    #my $diff_cmd = 'ccdiff --new=bold_green --old=bold_red --utf-8 --unified ' . $old . ' ' . $new;
    # try to use BBEdit as the diff tool. The manual page says it should be $new then $old, but that puts $new on the left...
    my $diff_cmd = 'bbdiff --wait --resume -- ' . $old . ' ' . $new;
    print $diff_cmd . " .. ";
    my $diff_res = qx"$diff_cmd";
    say "done";
    ## don't use the pager if using bbedit as diff tool
    ##{
    ##    my $obj = IO::Pager->new();
    ##    $obj->binmode(':utf8');
    ##    $obj->print($diff_res);
    ##}
    my $count  = sprintf( $fmtstr, $curr, $total );
    my $action = prompt( "$count: [s]kip, [d]elete old, [k]eep old, [q]uit", { 'anyone' => [ qw(s d k q) ], 'default' => 'k', 'color' => [ qw(bold blue) ] } );
    my $rename = 0;
    if ( $action eq 's' ) {    # skip this file, must be dealt with manually
        push @$uncompleted, [ $wid, 'skipped' ];
    }
    elsif ( $action eq 'd' ) {    # delete the "old" file, usually because the changes are so minor
        path($old)->remove();
        push @$actions, [ $wid, "deleted $old" ];
        $rename = 1;
    }
    elsif ( $action eq 'k' ) {    # move the "old" file into the "old" folder and rename
        path($old)->move( 'old/' . $old );
        push @$actions, [ $wid, "moved $old to 'old' directory" ];
        $rename = 1;
    }
    elsif ( $action eq 'q' ) {    #quit
        last;
    }

    if ($rename) {
        my $base = $wid . '.html';
        if ( path($base)->exists() ) {
            warn("$wid: $base exists\n");
            push @$uncompleted, [ $wid, "could not rename $new because unaffixed file exists" ];
        }
        else {
            path($new)->move($base);
            push @$actions, [ $wid, "moved $new to $base" ];
        }
    }

    $curr++;
}

say "Actions taken:";
foreach my $action (@$actions) {
    say "\t" . join( ': ' => @$action );
}
print "\n";
say "Remaining issues:";
foreach my $remaining (@$uncompleted) {
    say "\t" . join( ': ' => @$remaining );
}

say "\ndone.";