Sha256: c104496362b53f27d63d9d8129d7cfb7408aa43c0caf13f384d61a9735d77c6c
Contents?: true
Size: 987 Bytes
Versions: 7
Compression:
Stored size: 987 Bytes
Contents
#!/bin/bash # # score - compute a character-based error score # # usage: score file1 file2 # spread(){ # write each input character to a separate line expand $1 | awk ' { n=length($0); for (i=1; i<=n; i++) printf("%c\n", substr($0,i,1)); printf("end of line\n"); }' } # count the character deletions and insertions numerator=`diff <(spread $1) <(spread $2) | egrep -c '<|>'` # count the characters in both files denominator=`cat $1 $2 | wc -c` perl -e "printf(\"%9.6f\\n\", $numerator/$denominator )" diff <(spread $1) <(spread $2) | awk ' /^[1-9]/{ if (length(del)+length(add)){ printf("s/%s/%s/\n",del,add); del=""; add=""; next; } } /^< end of line/{printf("s//\\n/\n"); next;} # delete a line /^> end of line/{printf("s/\\n//\n"); next;} # insert a line /^</{if (NF>1) {del=del $2;} else printf("s/ //\n");} # delete a character /^>/{if (NF>1) {add=add $2;} else printf("s// /\n");} # insert a character ' | sort | uniq -c | sort -n | tail
Version data entries
7 entries across 7 versions & 2 rubygems