#!/usr/bin/perl -w
#
# try to find some mistakes created by manual translation
#
# with supplied arguments you can find untraslated messages in linuxconf
# dic files and/or generate comments free prepared dictionary files with
# translation tags with empty contens if missing from present dictionary
# files and/or remove lines with empty translation contens and find
# too long messages
#
# Author: Peter Ivanyi ivanyi@internet.sk
#
#
#
$progname = "chkdic.pl";
$version = "0.12";
$docreateempty = 0;
$doshowuntrans = 0;
$doremoveempty = 0;
$extnew = "new";
$maxlength = 76;  # maximum length of message without splitting to more lines
$usage = "Usage: $progname [options...]

Options:
 -c|--createempty       create skeleton files \*.$extnew for translation with
                        empty translation tags if missing and removed comments
                        about new/updated message
 -h|--help              print this help message and exit
 -r|--removeempty       remove lines with empty translation contens and remove
                        comments about new/updated message
 -u|--untranslated      print also all untranslated messages
 -V|--version           print program version and exit

$progname operates on all files with dic extension in current directory.
Options --createempty and --removeempty create new files with added .$extnew
suffix (backup .$extnew files if any before).
";

while( defined($_ = $ARGV[0]) ) {
    shift;
    # if(/^--$/) {last;}
    if (/^(-c|--createempty)$/) { $docreateempty = 1;}
    elsif (/^(-h|--help)$/) { &usage, exit 0;}
    elsif (/^(-r|--removeempty)$/) { $doremoveempty = 1;}
    elsif (/^(-u|--untranslated)$/) { $doshowuntrans = 1;}
    elsif (/^(-V|--version)$/) { print $version, "\n"; exit 0;}
    else { &usage; exit 1;}
}
if ( $docreateempty == 1 && $doremoveempty == 1 ) {
    die
 "You can't specify options --createempty and --removeempty in the same time";
}

@files = glob("*.dic");
if ( scalar @files == 0 ) {
    die "No file with .dic extension found in current directory.";
}

foreach(@files) {
    $filein = $_;
    $fileout = $filein . "." . $extnew;
    $previousline = "";
    $srcline = "";
    $transline = "";
    $srclineno = 0;
    $translineno = 0;
    $lineno = 0;
    $modified = 0;
    $cntuntranslines = 0;
    $cntsrc = 0; $cntold = 0; $cnttrans = 0; $cntcont = 0;
    $iscomment = 0; $ismisc = 0;
    open(FILEIN, "<$filein") || die("open $filein: ",$!);
    open(FILEOUT, ">$fileout") || die("open $fileout: ",$!)
      if $docreateempty == 1 or $doremoveempty == 1 ;

        while(<FILEIN>) {
        chop;
        $lineno++;
        if ( $lineno == 1 && ! /^\@version/ ) {
            print STDERR $filein ,
              ": expected \@version in line 1, skipping file...", "\n";
            last;
        } elsif ( /^!/ ) {
            # we will operate on this later (together with eof)
        } elsif ( /^\s*:E/ ) {
            $cntsrc++; $cntcont = 0;
            $iscomment = 0; $ismisc = 0;
            if ( $cntsrc == 1 ) {
                $srcline = $_;
                $srclineno = $lineno;
            }
        } elsif ( /^\s*:T/ ) {
            $cnttrans++; $cntcont = 0;
            $iscomment = 0; $ismisc = 0;
            if ( $cnttrans == 1 ) {
                $transline = $_;
                $translineno = $lineno;
            }
        } elsif ( /^\s*\+/ ) {
            $cntcont++;
            $iscomment = 0; $ismisc = 0;
        } elsif ( /^\s*:Z/ ) {
            $cntold++; $cntcont = 0;
            $iscomment = 0; $ismisc = 0;
            &printold;
        } elsif ( /^\s*# \*\*\* (new|updated) message/ ) {
            $iscomment++; $ismisc = 0;
        } else {
            # translator's comments, mistakes, unknown tags ...
            $ismisc = 1;
        }
        if ( /^!/ || eof ) {
            # fix for ex. broken boot messages
            if ( $cntsrc == $cnttrans && $cntsrc == 1) {
                if ( $srcline =~ /\\n$/ && $transline !~ /\\n$/ ) {
                    print STDERR $filein, ":($translineno)(missing \\n)",
                      $transline, "\n";
                }
            }
            if ($cntsrc > 0 && $cnttrans == 0) {
                $cntuntranslines = $cntcont + 1;
            }
            &printuntranslated if $doshowuntrans == 1;
            &writeempty if ( $docreateempty == 1 && ! eof FILEIN );
            $cntsrc = 0; $cntold = 0; $cnttrans = 0; $cntcont = 0;
            $iscomment = 0; $ismisc = 0;
        }

        &checkgeneral;

        if ( $docreateempty == 1 ) {
            if ( $iscomment == 0 ) {
                print FILEOUT $_, "\n";
                &writeempty if eof FILEIN;
            } else {
                $modified = 1;
            }
        } elsif ( $doremoveempty == 1 ) {
            if ( /^\s*\+\s*$/ || /^\s*:T\s*$/ || $iscomment != 0 ) {
                $modified = 1;
            } else {
                print FILEOUT $_, "\n";
            }
        }
        $previousline = $_;
  }

  if ( $docreateempty == 1 or $doremoveempty == 1 ) {
      close FILEOUT;
      # we don't need the same (unmodified) files twice
      unlink $fileout if $modified == 0;
  }
  close FILEIN;
}

sub checkgeneral {
    if ( /^\s*\+\s*$/ || /^\s*:T\s*$/ ) {
        print STDERR $filein, ":($lineno)(empty)", $_,"\n";
    }
    if ( mylength($_) > $maxlength ) {
        print STDERR $filein, ":($lineno)(too long:", mylength($_),")", $_, "\n";
    }
    if ( $cntsrc == 2 && $cntcont == 0 ) {
        print STDERR $filein, ":($lineno)(multiple :E)", $_, "\n";
    }
    elsif ( $cntold == 2 && $cntcont == 0 ) {
        print STDERR $filein, ":($lineno)(multiple :Z)", $previousline, "\n";
    }
    elsif ( $cnttrans == 2 && $cntcont == 0 ) {
        print STDERR $filein, ":($lineno)(multiple :T)", $previousline, "\n";
    }
    elsif ( $ismisc == 1 && $_ !~ /(\s*#)|(^\s*$)/ && $lineno != 1 ) {
        print STDERR $filein, ":($lineno)(unknown)", $_, "\n";
    }
# maybe mistake by typing \t instaed of \n
#  if ( /\\t\s*$/ ) {
#      print STDERR $filein, ":($lineno)(typo \\t)", $_, "\n";
#  }
# correct regularly produced typos with unusual keyboard layout
#    if ( /òn/ || /òt/ ) {
#        print STDERR $filein, ":($lineno)(typo òn/òt)", $_, "\n";
#    }
            
    # previous line should end with `\n' string if current is continuing line
    if ( $cntcont >= 1 ) {
        if ( $previousline !~ /\\n$/ and $ismisc == 0 and $iscomment == 0 ) {
            if ( $previousline !~ /\\n\s+$/ ) {
                print STDERR $filein, ":(",$lineno - 1,")(missing \\n)",
                  $previousline, "\n";
            } else {
                print STDERR $filein, ":(",$lineno - 1,")(space after \\n)",
                  $previousline, "\n";
            }
        }
    }
}

sub printold {
    if ( $cntold == 1 ) {
        print STDERR $filein,":($lineno)(old msg)", $_, "\n";
    }
}

sub printuntranslated {
    if ( $cntsrc > 0 && $cnttrans == 0 ) {
        # ignore empty src
        if ( $srcline !~ /^\s*:E\s*$/ ) {
            print STDERR $filein, ":($srclineno)(untransl)", $srcline, "\n";
        }
    }
}

sub writeempty {
    if ( $cntuntranslines > 0 ) {
        print FILEOUT "    :T \n";
        foreach ( 1 .. $cntuntranslines - 1 ) {
          print FILEOUT "      +\n";
        }
        $modified = 1;
        $cntuntranslines = 0;
    }
}

# in case of any occurence %s and similar string is length only good/bad
# estimation
sub mylength {
    $max = 0;
    $currline = $_;
    $currline =~ s/^\s*#.*//;
    $currline =~ s/^\s*(:T|:E|:Z|\+)\s*//;
    $currline =~ s/\s*$//;
    $currline =~ s/(\\([^n]){1})/$2/g;
    $currline =~ s/%s/0123456789/g;
    @lines = split(/\\n/, $currline);
    foreach(@lines) {
       $max = length($_) if length($_) > $max;
    }
    return $max;
}

sub usage {
    print STDERR $usage;
}
