#!/bin/bash # # score - compute a character-based error score # # usage: score file1 file2 # spread(){ # write each input character to a separate line expand $1 | awk ' { n=length($0); for (i=1; i<=n; i++) printf("%c\n", substr($0,i,1)); printf("end of line\n"); }' } # count the character deletions and insertions numerator=`diff <(spread $1) <(spread $2) | egrep -c '<|>'` # count the characters in both files denominator=`cat $1 $2 | wc -c` perl -e "printf(\"%9.6f\\n\", $numerator/$denominator )" diff <(spread $1) <(spread $2) | awk ' /^[1-9]/{ if (length(del)+length(add)){ printf("s/%s/%s/\n",del,add); del=""; add=""; next; } } /^< end of line/{printf("s//\\n/\n"); next;} # delete a line /^> end of line/{printf("s/\\n//\n"); next;} # insert a line /^1) {del=del $2;} else printf("s/ //\n");} # delete a character /^>/{if (NF>1) {add=add $2;} else printf("s// /\n");} # insert a character ' | sort | uniq -c | sort -n | tail