#!/usr/bin/perl -w use utf8; # (c) 2006 Felipe Sánchez Martínez # (c) 2006 Universitat d'Alacant # # This software calculates the word error rate (WER) and the # position-independent word error rate (PER) between the translation # performed by the apertium MT system an a reference translation # obtained by post-editing the system ouput. # # The edit_distance procedure used in this script is based on # the Levenshtein distance implementation by Jorge Mas Trullenque # that can be found in http://www.merriampark.com/ldperl2.htm # # This software is licensed under the GPL license version 3, or at # your option any later version # use strict; use warnings; # Getting command line arguments: use Getopt::Long; # Documentation: use Pod::Usage; # I/O Handler use IO::Handle; use locale; use POSIX qw(locale_h); setlocale(LC_ALL,""); my($test, $ref, $help, $version); my($nunknown, $ntest, $nref, $distance_nounk, $per_nounk); my($test_corpus, $ref_corpus); my(@words_test, @words_ref); # Command line arguments GetOptions( 'test|t=s' => \$test, 'ref|r=s' => \$ref, 'help|h' => \$help, 'version|v' => \$version, ) || pod2usage(2); if ($version) { print "apertium-eval-translator-line 1.2.0\n"; exit 0; } pod2usage(2) if $help; pod2usage(2) unless ($test); pod2usage(2) unless ($ref); open(TEST, "<$test") or die "Error: Cannot open test file \'$test\': $!\n"; open(REF, "<$ref") or die "Error: Cannot open reference file \'$ref\': $!\n"; #print "Test file: '$test'\n"; #print "Reference file '$ref'\n\n"; while() { &preprocess; $test_corpus=$_; $nunknown+=s/[*](\w+)/$1/g; @words_test = split /\s+/; $ntest+=@words_test; $_=; &preprocess; $ref_corpus=$_; @words_ref = split /\s+/; $nref+=@words_ref; $distance_nounk+=&edit_distance; $per_nounk+=&position_independent_correct_words; } close(TEST); close(REF); print "Statistics about input files\n"; print "-------------------------------------------------------\n"; print "Number of words in reference: $nref\n"; print "Number of words in test: $ntest\n"; print "Number of unknown words (marked with a star) in test: $nunknown\n"; print "Percentage of unknown words: ", sprintf("%.2f",($nunknown/$ntest)*100), " %\n"; print "\n"; print "Results when removing unknown-word marks (stars)\n"; print "-------------------------------------------------------\n"; print "Edit distance: $distance_nounk\n"; print "Word error rate (WER): ", sprintf("%.2f",($distance_nounk/$nref)*100), " %\n"; print "Number of position-independent correct words: ", $per_nounk, "\n"; print "Position-independent word error rate (PER): ", sprintf("%.2f",(1 - (($per_nounk - max(0, $ntest - $nref)) / $nref))*100), " %\n"; print "\n"; sub position_independent_correct_words { my (%hash_test, %hash_ref); foreach (@words_test) { $hash_test{$_}++; } foreach (@words_ref) { $hash_ref{$_}++; } my $correct=0; foreach (keys %hash_test) { if(defined($hash_ref{$_})) { $correct += min($hash_test{$_}, $hash_ref{$_}); } } return $correct; } sub edit_distance { my @W=(0..@words_ref); my ($i, $j, $cur, $next); for $i (0..$#words_test) { $cur=$i+1; for $j (0..$#words_ref){ my $cost=($words_test[$i] ne $words_ref[$j]); $next=min($W[$j+1]+1, $cur+1, $cost+$W[$j]); $W[$j]=$cur; $cur=$next; } $W[@words_ref]=$next; } return $next; } sub min { my @list = @_; my $min = $list[0]; foreach my $i (@list) { $min = $i if ($i < $min); } return $min; } sub max { my @list = @_; my $max = $list[0]; foreach my $i (@list) { $max = $i if ($i > $max); } return $max; } sub preprocess { chomp; s/^\s+//g; s/\s+$//g; } __END__ =head1 NAME =head1 SYNOPSIS apertium-eval-translator-line -test testfile -ref reffile Options: -test|-t Specify the file with the translation to evaluate -ref|-r Specify the file with the reference translation -help|-h Show this help message -version|-v Show version information and exit Note: Reference translation MUST have no unknown-word marks, even if they are free rides. This software calculates (at document level) the word error rate (WER) and the postion-independent word error rate (PER) between a translation performed by the Apertium MT system and a reference translation obtained by post-editing the system ouput. It is assumed that unknow words are marked with a star (*), as Apertium does; nevertheless, it can be easily adapted to evaluate other MT systems that do not mark unknown words with a star. (c) 2006 Felipe Sánchez-Martínez (c) 2006 Universitat d'Alacant This software is licensed under the GNU GENERAL PUBLIC LICENSE version 2, or at your option any latter version. See http://www.gnu.org/copyleft/gpl.html for a complete version of the license.