#!/bin/sh formats="json csv isam isamkd marshal" if ! ruby -v | grep -q jruby; then formats="$formats sqlite" fi MKCORPUS="ruby -Ilib ./bin/chbs --trace mkcorpus" CONVERT="ruby -Ilib ./bin/chbs --trace convert" for format in $formats; do echo "Creating corpus files for format $format" eval $MKCORPUS -i wiktionary -o corpus/gutenberg2005.$format data/wiktionary/gutenberg2005/* eval $MKCORPUS -i tvscripts -o corpus/tvscripts.$format data/wiktionary/tv-and-movies/* eval $MKCORPUS -i wordfrequency -o corpus/wordfrequency.$format data/wordfreq/toplemmas/* done echo "Making size100 test corpus in all formats..." eval $MKCORPUS -i tvscripts --randomize --limit 100 -o corpus/size100.json data/wiktionary/tv-and-movies/* for format in $formats; do if [ $format != "json" ]; then eval $CONVERT corpus/size100.json corpus/size100.$format fi done if [ -r "corpus/coca500k.json" ]; then echo "Converting coca500k.json corpus to all formats..." for format in $formats; do if [ $format != "json" ]; then eval $CONVERT corpus/coca500k.json corpus/coca500k.$format fi done fi