package MGH_Biostat::Locale::PO; use Modern::Perl '2012'; use experimental 'switch'; use Carp; use constant CTXSEP => chr(4); # this is basically a simple rewrite of Locale::PO # I've made a few small changes: # - strings are no longer quoted until they are dumped to file # - I've integrated the fix for the obsolete problem from https://github.com/cosimo/perl5-locale-po/pull/4 # - Importing into a hash now handles issues of context correctly # - Everything is utf8 -- you no longer pass a flag to force it into that encoding # - plural forms are an array, not a hash # - flags are implemented with a hash, not an array # - I've added a way to generate a hash key sub new { my $class = shift; my %options = @_; my %opts; # remove leading hyphens, if passed foreach my $optkey ( keys %options ) { my $newkey = ( $optkey =~ s/^-//r ); $opts{$newkey} = $options{$optkey}; } my $self = {}; bless $self, $class; $self->{'_flags'} = {}; my @optcalls = qw(msgid fuzzy_msgid msgid_plural fuzzy_msgid_plural msgstr msgstr_n msgctxt fuzzy_msgctxt comment fuzzy automatic reference loaded_line_number); foreach my $call (@optcalls) { $self->$call( $opts{$call} ) if ( exists( $opts{$call} ) ); } my @formats = qw(c php perl); foreach my $format (@formats) { $self->_tri_value_flag( $format . '-format', 1 ) if defined( $opts{ $format . '_format' } ); $self->_tri_value_flag( $format . '-format', 0 ) if defined( $opts{ 'no_' . $format . '_format' } ); } return $self; } # install simple accessor subroutines my @string_sub_names = qw(msgid fuzzy_msgid msgid_plural fuzzy_msgid_plural msgctxt fuzzy_msgctxt msgstr comment automatic reference obsolete loaded_line_number); foreach my $name (@string_sub_names) { no strict 'refs'; *{$name} = sub { my $self = shift; $self->_accessor( $name, @_ ); }; } sub _accessor { my $self = shift; my $key = shift; @_ ? $self->{$key} = shift : $self->{$key}; } # this differs from the original, as I want this to be an array sub msgstr_n { my $self = shift; if (@_) { my $aref = shift; croak 'Argument to msgstr_n must be an arrayref: [ "string 0", "string 1", ... ].' unless ref($aref) eq 'ARRAY'; $self->{'msgstr_n'} = $aref; } return $self->{'msgstr_n'}; } # deal with flags sub add_flag { my $self = shift; my ($flag_name) = @_; $self->{'_flags'}{$flag_name} = undef; } sub remove_flag { my $self = shift; my ($flag_name) = @_; delete( $self->{'_flags'}{$flag_name} ); } sub has_flag { my $self = shift; my ($flag_name) = @_; return exists( $self->{'_flags'}{$flag_name} ); } sub _tri_value_flag { my $self = shift; my $flag_name = shift; if (@_) { #set or clear flag my $value = shift; if ( !defined($value) or ( $value eq '' ) ) { $self->remove_flag($flag_name); $self->remove_flag( 'no-' . $flag_name ); return undef; } elsif ($value) { $self->add_flag($flag_name); $self->remove_flag( 'no-' . $flag_name ); return 1; } else { $self->remove_flag($flag_name); $self->add_flag( 'no-' . $flag_name ); return 0; } } else { # check the flags return 1 if $self->has_flag($flag_name); return 0 if $self->has_flag( 'no-' . $flag_name ); return undef; } } sub fuzzy { my $self = shift; if (@_) { my $value = shift; $value ? $self->add_flag('fuzzy') : $self->remove_flag('fuzzy'); } return $self->has_flag('fuzzy'); } # install flag accessor subroutines my @flag_sub_names = qw(c_format php_format perl_format); foreach my $name (@flag_sub_names) { no strict 'refs'; my $flag = ( $name =~ s/_/-/gr ); *{$name} = sub { my $self = shift; return $self->_tri_value_flag( $flag, @_ ); }; } # deal with quoting # copied directly from Locale::PO sub quote { my $self = shift; my $string = shift; return undef unless defined $string; $string =~ s/\\(?!t)/\\\\/g; # \t is a tab $string =~ s/"/\\"/g; $string =~ s/\n/\\n/g; return "\"$string\""; } sub dequote { my $self = shift; my $string = shift; return undef unless defined $string; $string =~ s/^"(.*)"/$1/; $string =~ s/\\"/"/g; $string =~ s/(?<!(\\))\\n/\n/g; # newline $string =~ s/(?<!(\\))\\{2}n/\\n/g; # inline newline $string =~ s/(?<!(\\))\\{3}n/\\\n/g; # \ followed by newline $string =~ s/\\{4}n/\\\\n/g; # \ followed by inline newline $string =~ s/\\\\(?!n)/\\/g; # all slashes not related to a newline return $string; } # generate a hash key automatically sub hashkey { my $self = shift; return ( $self->msgctxt ? $self->msgctxt . CTXSEP : '' ) . $self->msgid; } # handle dumping # I directly access the keys here, because I need to test for existence, not definedness sub dump { my $self = shift; my $dump = ''; my %prefixes = ( 'comment' => '# ', 'automatic' => '#. ', 'reference' => '#: ' ); foreach my $comment_type (qw(comment automatic reference)) { if ( defined( $self->{$comment_type} ) ) { $dump .= $self->_dump_multi_comment( $self->{$comment_type}, $prefixes{$comment_type} ); } } if ( %{ $self->{'_flags'} } ) { $dump .= '#, ' . join( ', ' => @{ [ sort keys %{ $self->{'_flags'} } ] } ) . "\n"; } my $fuzzy_prefix = $self->obsolete ? '#~| ' : '#| '; foreach my $fuzzy_type (qw(fuzzy_msgctxt fuzzy_msgid fuzzy_msgid_plural)) { if ( exists( $self->{$fuzzy_type} ) ) { my $real_type = ( $fuzzy_type =~ s/^fuzzy_//r ); $dump .= $fuzzy_prefix . $real_type . ' ' . $self->_normalize_str( $self->{$fuzzy_type}, $fuzzy_prefix ); } } my $obsolete_prefix = $self->obsolete ? '#~ ' : ''; foreach my $type (qw(msgctxt msgid msgid_plural msgstr)) { if ( exists( $self->{$type} ) ) { $dump .= $obsolete_prefix . $type . ' ' . $self->_normalize_str( $self->{$type}, $obsolete_prefix ); } } if ( exists( $self->{'msgstr_n'} ) ) { for ( my $i = 0 ; $i < scalar( @{ $self->{'msgstr_n'} } ) ; $i++ ) { $dump .= $obsolete_prefix . 'msgstr[' . $i . '] ' . $self->_normalize_str( $self->{'msgstr_n'}[ $i ], $obsolete_prefix ); } } $dump .= "\n"; return $dump; } sub _dump_multi_comment { my $self = shift; my $comment = shift; my $leader = shift; my $chopped = $leader; chop($chopped); my $result = $leader . $comment; $result =~ s/\n/\n$leader/g; $result =~ s/^$leader$/$chopped/gm; $result .= "\n"; return $result; } sub _normalize_str { my $self = shift; my $string = shift; my $prefix = shift // ""; # Multiline: this isn't quite perfect, but fast and easy if ( defined $string && $string =~ /\n/ ) { my @lines = split( /\n/, $string, -1 ); my $lastline = pop @lines; # special treatment for this one my $output; $output = qq{""\n} if ( $#lines != 0 ); foreach (@lines) { $output .= $prefix . $self->quote("$_\n") . "\n"; } $output .= $prefix . $self->quote($lastline) . "\n" if $lastline ne ""; return $output; } # Single line else { return ( $self->quote($string) || "" ) . "\n"; } } # saving files sub save_file_fromarray { my $self = shift; $self->_save_file( 0, @_ ); } sub save_file_fromhash { my $self = shift; $self->_save_file( 1, @_ ); } sub _save_file { my $self = shift; my ( $hash, $file, $entries ) = @_; open( my $out, '>:encoding(utf-8)', $file ) or croak("Could not open $file for writing: $!"); if ($hash) { foreach ( sort keys %$entries ) { print $out $entries->{$_}->dump(); } } else { foreach (@$entries) { print $out $_->dump(); } } close($out); } # reading files sub load_file_asarray { my $self = shift; $self->_load_file( 0, @_ ); } sub load_file_ashash { my $self = shift; $self->_load_file( 1, @_ ); } sub _load_file { my $self = shift; my $hash = shift; my $file = shift; my $class = ref $self || $self; my ( @entries, %entries ); my $line_number = 0; my $po; my %buffer; my $last_buffer; my $plural_forms; open( my $in, '<:encoding(utf-8)', $file ) or croak("Could not open $file for reading: $!"); while (<$in>) { chomp; $line_number++; s/[\r\n]*$//; if (/^$/) { # empty line is end of an entry if ( defined($po) ) { foreach my $key (qw(fuzzy_msgctxt fuzzy_msgid fuzzy_msgid_plural msgctxt msgid msgid_plural msgstr msgstr_n)) { no strict 'refs'; $po->$key( $buffer{$key} ) if exists( $buffer{$key} ); } if ( $po->msgstr_n and defined($plural_forms) ) { $po->plural_form_text($plural_forms); $po->translate_plural_form(); } if ($hash) { if ( $po->_hash_key_ok( $po->hashkey, \%entries ) ) { $entries{ $po->hashkey } = $po; } } else { push @entries, $po; } if ( $po->msgid eq '' ) { # this is a PO header, maybe we can extract some plural forms my $temp = $po->msgstr; if ( $temp =~ m/Plural-Forms: nplurals=\d+; plural=(.+?);\n/ ) { $plural_forms = $1; } } undef $po; undef $last_buffer; %buffer = (); } } elsif ( /^#\s+(.*)/ or /^#()$/ ) { # translator comments $po = $class->new( loaded_line_number => $line_number ) unless defined($po); if ( defined( $po->comment ) ) { $po->comment( $po->comment . "\n$1" ); } else { $po->comment($1); } } elsif (/^#\.\s*(.*)/) { # Automatic comments $po = $class->new( loaded_line_number => $line_number ) unless defined($po); if ( defined( $po->automatic ) ) { $po->automatic( $po->automatic . "\n$1" ); } else { $po->automatic($1); } } elsif (/^#:\s+(.*)/) { # reference $po = $class->new( loaded_line_number => $line_number ) unless defined($po); if ( defined( $po->reference ) ) { $po->reference( $po->reference . "\n$1" ); } else { $po->reference($1); } } elsif (/^#,\s+(.*)/) { # flags my @flags = split /\s*[,]\s*/, $1; $po = $class->new( loaded_line_number => $line_number ) unless defined($po); foreach my $flag (@flags) { $po->add_flag($flag); } } elsif (/^#(~)?\|\s+msgctxt\s+(.*)/) { $po = $class->new( loaded_line_number => $line_number ) unless defined($po); $buffer{fuzzy_msgctxt} = $self->dequote($2); $last_buffer = \$buffer{fuzzy_msgctxt}; $po->obsolete(1) if $1; } elsif (/^#(~)?\|\s+msgid\s+(.*)/) { $po = $class->new( loaded_line_number => $line_number ) unless defined($po); $buffer{fuzzy_msgid} = $self->dequote($2); $last_buffer = \$buffer{fuzzy_msgid}; $po->obsolete(1) if $1; } elsif (/^#(~)?\|\s+msgid_plural\s+(.*)/) { $po = $class->new( loaded_line_number => $line_number ) unless defined($po); $buffer{fuzzy_msgid_plural} = $self->dequote($2); $last_buffer = \$buffer{fuzzy_msgid_plural}; $po->obsolete(1) if $1; } elsif (/^(#~\s+)?msgctxt\s+(.*)/) { $po = $class->new( loaded_line_number => $line_number ) unless defined($po); $buffer{msgctxt} = $self->dequote($2); $last_buffer = \$buffer{msgctxt}; $po->obsolete(1) if $1; } elsif (/^(#~\s+)?msgid\s+(.*)/) { $po = $class->new( loaded_line_number => $line_number ) unless defined($po); $buffer{msgid} = $self->dequote($2); $last_buffer = \$buffer{msgid}; $po->obsolete(1) if $1; } elsif (/^(#~\s+)?msgid_plural\s+(.*)/) { $po = $class->new( loaded_line_number => $line_number ) unless defined($po); $buffer{msgid_plural} = $self->dequote($2); $last_buffer = \$buffer{msgid_plural}; $po->obsolete(1) if $1; } elsif (/^(?:#~\s+)?msgstr\s+(.*)/) { # translated string $buffer{msgstr} = $self->dequote($1); $last_buffer = \$buffer{msgstr}; } elsif (/^(?:#~\s+)?msgstr\[(\d+)\]\s+(.*)/) { # translated string $buffer{msgstr_n}[ $1 ] = $self->dequote($2); $last_buffer = \$buffer{msgstr_n}[ $1 ]; } elsif (/^(?:#(?:~|~\||\|)\s+)?(".*)/) { # continued string. Accounts for: # normal : "string" # obsolete : #~ "string" # fuzzy : #| "string" # fuzzy+obsolete : #~| "string" $$last_buffer .= $self->dequote($1); } else { warn "Strange line at $file line $line_number: [$_]\n"; } } if ( defined($po) ) { foreach my $key (qw(fuzzy_msgctxt fuzzy_msgid fuzzy_msgid_plural msgctxt msgid msgid_plural msgstr msgstr_n)) { no strict 'refs'; $po->$key( $buffer{$key} ) if defined( $buffer{$key} ); } my $msg_key = ( $po->msgctxt ? $po->msgctxt . CTXSEP : '' ) . $po->msgid; if ($hash) { if ( $po->_hash_key_ok( $msg_key, \%entries ) ) { $entries{$msg_key} = $po; } } else { push @entries, $po; } undef $po; undef $last_buffer; %buffer = (); } close($in); return ( $hash ? \%entries : \@entries ); } sub _hash_key_ok { my $self = shift; my ( $key, $entries ) = @_; if ( $entries->{$key} ) { # don't overwrite non-obsolete entries with obsolete ones return 0 if ( ( $self->obsolete ) && ( not $entries->{$key}->obsolete ) ); # don't overwrite translated entries with untranslated ones return 0 if ( ( $self->msgstr !~ /\w/ ) && ( $entries->{$key}->msgstr =~ /\w/ ) ); } return 1; } # additional stuff for plural forms sub plural_form_text { my $self = shift; $self->_accessor( 'plural_form_text', @_ ); } sub has_plural_form { my $self = shift; return defined( $self->{'plural_form_text'} ); } sub translate_plural_form { my $self = shift; return unless $self->has_plural_form; my $pft = $self->plural_form_text; $pft =~ s/n/ \$n /g; $self->{'plural_form_sub'} = eval "sub { my \$n = shift; return $pft; }"; } sub msgstr_n_apply { my $self = shift; my ($n) = @_; my $msgstr_n = $self->msgstr_n; return $n unless $msgstr_n; # only work if we have some strings available my $rule_sub = $self->{'plural_form_sub'}; return $n unless defined($rule_sub); my $choice = $rule_sub->($n); my $format_string = $msgstr_n->[ $choice ] // '%s'; return sprintf( $format_string, $n ); } 1;