Sha256: d4f9e5776f64b6d2289f5055cf7a4200d82cbe8ad5543b89582cdf8653a23f75

Contents?: true

Size: 1.09 KB

Versions: 6

Compression:

Stored size: 1.09 KB

Contents

#!/bin/sh

formats="json csv isam isamkd marshal"

if ! ruby -v | grep -q jruby; then
  formats="$formats sqlite"
fi

MKCORPUS="ruby -Ilib ./bin/chbs --trace mkcorpus"
CONVERT="ruby -Ilib ./bin/chbs --trace convert"

for format in $formats; do
  echo "Creating corpus files for format $format"
  eval $MKCORPUS -i wiktionary -o corpus/gutenberg2005.$format data/wiktionary/gutenberg2005/*
  eval $MKCORPUS -i tvscripts -o corpus/tvscripts.$format data/wiktionary/tv-and-movies/*
  eval $MKCORPUS -i wordfrequency -o corpus/wordfrequency.$format data/wordfreq/toplemmas/*
done

echo "Making size100 test corpus in all formats..."
eval $MKCORPUS -i tvscripts --randomize --limit 100 -o corpus/size100.json data/wiktionary/tv-and-movies/*
for format in $formats; do
  if [ $format != "json" ]; then
    eval $CONVERT corpus/size100.json corpus/size100.$format
  fi
done

if [ -r "corpus/coca500k.json" ]; then
  echo "Converting coca500k.json corpus to all formats..."
  for format in $formats; do
    if [ $format != "json" ]; then
      eval $CONVERT corpus/coca500k.json corpus/coca500k.$format
    fi
  done
fi

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
correct-horse-battery-staple-0.6.6 script/generate_all
correct-horse-battery-staple-0.6.5 script/generate_all
correct-horse-battery-staple-0.6.4 script/generate_all
correct-horse-battery-staple-0.6.3 script/generate_all
correct-horse-battery-staple-0.6.2 script/generate_all
correct-horse-battery-staple-0.6.1 script/generate_all