./TravEpi/SimpleRulesBase/Logger.pm


package MGH_Biostat::TravEpi::SimpleRulesBase::Logger;
use Modern::Perl '2012';
use experimental 'switch';
use Socket;
use POSIX 'strftime';
use Plack::Request;

sub new {
    my $class = shift;
    my ($print_obj) = @_;

    my $self = bless {}, $class;

    $self->{'print_obj'} = $print_obj;

    return $self;
}

our $filters = {
    'proxies' => [ '155.52.221.6', '170.223.178.73' ],
    'drop_uas' => [
        qr/UptimeRobot/i,
    ],
    'bot_uas' => [
        qr/googlebot/i,       qr/bingbot/i, qr/baiduspider/i,     qr/ahrefsbot/i,
        qr/Ezooms.*moz\.com/, qr/MJ12bot/,  qr/Xenu Link Sleuth/, qr/yandex/i,
        qr/gsa-crawler/i,     qr/Applebot/i,
    ],
    'skip_names' => [
        qr/^buxtehude\.mgh\.harvard\.edu/i, qr/^w0062671\.mgh\.harvard\.edu/i, qr/^w0108948\.mgh\.harvard\.edu/i, qr/^w0084017\.mgh\.harvard\.edu/i,    # ricky, regina, sarah, ed
        qr/googlebot/, qr/crawl\.baidu/, qr/static\.reverse\.softlayer\.com/, qr/search\.msn\.com/,                                                     # some search engines, needed to filter data pre jan 28 2014
    ],
};

our $ip_cache = {};

sub log {
    my $self = shift;
    my ( $env, $hash, $lang, $action, $extra_data ) = @_;
    $extra_data //= '';

    my $req     = Plack::Request->new($env);
    my $cookies = $req->cookies;
    return if ( defined( $cookies->{'nologging'} ) and ( $cookies->{'nologging'} eq 'stop-logging' ) );

    my $time = strftime( '%Y-%m-%d %H:%M:%S', localtime() );

    my $ips = [ grep { $_; } ( split( /\s*,\s*/, ( $env->{'HTTP_X_FORWARDED_FOR'} // '' ) ), $env->{'REMOTE_ADDR'} ) ];
    foreach (@$ips) {
        if ( $_ eq '127.0.0.1' ) { $_ = undef; }    # localhost
        elsif ( $_ ~~ $filters->{'proxies'} ) { $_ = undef; }    # proxies
        else {
            if ( $ip_cache->{$_} ) {
                $_ = $ip_cache->{$_};
            }
            else {
                my $temp = $_;
                $_ = scalar( gethostbyaddr( inet_aton($_), AF_INET ) ) // $_;
                $ip_cache->{$temp} = $_;
            }
        }
    }
    my $ip = join( ' + ' => grep { $_; } @$ips ) || ( 'un-resolvable: ' . ( $env->{'HTTP_X_FORWARDED_FOR'} // '' ) . ' + ' . $env->{'REMOTE_ADDR'} );
    my $ua = $env->{'HTTP_USER_AGENT'} // '';

    my $prefix = 'keep';

    if ( ( $prefix eq 'keep' ) and ( $ua ~~ $filters->{'drop_uas'} ) ) {
        $prefix = 'drop';
    }

    if ( ( $prefix eq 'keep' ) and ( $ua ~~ $filters->{'bot_uas'} ) ) {
        $prefix = 'bot';
    }

    if ( ( $prefix eq 'keep' ) and ( $ip ~~ $filters->{'skip_names'} ) ) {
        $prefix = 'skip';
    }

    my $line = $prefix . ': ' . join( ' ; ' => ( $time, $hash, $ip, $lang, $action, $ua, $extra_data ) ) . "\n";

    if ( $prefix ne 'drop' ) {
        $self->{'print_obj'}->print($line);
    }
}

1;