Sha256: 39ceb5c1b36c00cc3141b9545839e8003598e16b33673417ebaf6e2c00900954

Contents?: true

Size: 1.59 KB

Versions: 7

Compression:

Stored size: 1.59 KB

Contents

#!/bin/bash
# script to check gocr results of new versions against a testbase
# to ensure that recognition is not worse than a minimum level,
# testbase is a base of (png,pnm,jpg)-image files and a textfile
# with the same name and appended suffix .txt
# containing the desired output, differences will be shown
#
# example:
#  bin/gocr_chk.sh testbase/free/{clean,glued,dusty}/{numbers,text}/
#
# ToDo: output final statistics
# version
#  2009-07-29 numfiles=6 bad=3 good=3 numchars=269 bad=13 tmp09 jocr  5%
#
GOCR=gocr
numfiles=0      # number of checked files
numbad=0        # bad recognitions (ToDo: false positive, false negative)
numgood=0       # good recognitions
numchars=0      # tested chars
badchars=0      # bad chars
dirs=.
numbers=0       # check numbers only?
if test "$1" = "-n"; then numbers=1; shift; fi
if test -d "$1"; then dirs="$*"; fi
if test -x ./gocr; then GOCR=./gocr; fi
for tfile in $(find $dirs -name \*.txt|xargs); do
  ifile=$(echo ${tfile} | sed 's/\.txt//')
  if [ -r $ifile ]; then
    numfiles=$(($numfiles+1))
    x=$(cat ${tfile} | wc -m)
    # echo -ne "test ${ifile}{.txt}  chars=$x\n"
    printf "test %-59s chars= %4d\n" "${ifile}" $x
    numchars=$(($numchars + $x))
    if ${GOCR} ${ifile} | diff -b -B -u ${tfile} -; then
      numgood=$(($numgood+1))
    else
      # count different chars
      x=$(${GOCR} ${ifile} | cmp -l ${tfile} - | wc -l) # charnum ibyte obyte
      badchars=$(($badchars+$x))
      numbad=$(($numbad+1))
    fi
  fi
done
echo -e " : $(date +%F)  numfiles= $numfiles bad= $numbad  numchars= $numchars bad= $badchars  $(($badchars * 100 / $numchars))%"

Version data entries

7 entries across 7 versions & 2 rubygems

Version Path
entangledstate-isbn-1.4.1 src/gocr-0.48/bin/gocr_chk.sh
isbn-2.0.4 src/gocr-0.48/bin/gocr_chk.sh
isbn-2.0.3 src/gocr-0.48/bin/gocr_chk.sh
isbn-2.0.2 src/gocr-0.48/bin/gocr_chk.sh
isbn-2.0.1 src/gocr-0.48/bin/gocr_chk.sh
isbn-2.0.0 src/gocr-0.48/bin/gocr_chk.sh
isbn-1.4.1 src/gocr-0.48/bin/gocr_chk.sh