Sha256: c104496362b53f27d63d9d8129d7cfb7408aa43c0caf13f384d61a9735d77c6c

Contents?: true

Size: 987 Bytes

Versions: 7

Compression:

Stored size: 987 Bytes

Contents

#!/bin/bash
#
#	score - compute a character-based error score
#
#	usage: score  file1  file2
#
spread(){
#	write each input character to a separate line
expand $1 | awk '
{
    n=length($0);
    for (i=1; i<=n; i++) printf("%c\n", substr($0,i,1));
    printf("end of line\n");
}'
}

# count the character deletions and insertions
numerator=`diff <(spread $1) <(spread $2) | egrep -c '<|>'`
# count the characters in both files
denominator=`cat $1 $2 | wc -c`
perl -e "printf(\"%9.6f\\n\", $numerator/$denominator )"

diff <(spread $1) <(spread $2) | awk '
/^[1-9]/{
  if (length(del)+length(add)){
     printf("s/%s/%s/\n",del,add); 
     del=""; 
     add=""; 
     next;
  }
}
/^< end of line/{printf("s//\\n/\n"); next;} # delete a line
/^> end of line/{printf("s/\\n//\n"); next;} # insert a line
/^</{if (NF>1) {del=del $2;} else printf("s/ //\n");}	# delete a character
/^>/{if (NF>1) {add=add $2;} else printf("s// /\n");}	# insert a character
' | sort | uniq -c | sort -n | tail

Version data entries

7 entries across 7 versions & 2 rubygems

Version Path
entangledstate-isbn-1.4.1 src/gocr-0.48/examples/score
isbn-2.0.4 src/gocr-0.48/examples/score
isbn-2.0.3 src/gocr-0.48/examples/score
isbn-2.0.2 src/gocr-0.48/examples/score
isbn-2.0.1 src/gocr-0.48/examples/score
isbn-2.0.0 src/gocr-0.48/examples/score
isbn-1.4.1 src/gocr-0.48/examples/score