Sha256: cfa4ef5d504cc13e2020fd91c1ac621b2cbe75da7c9981d2ed3a920bfb0732ae

Contents?: true

Size: 654 Bytes

Versions: 7

Compression:

Stored size: 654 Bytes

Contents

#!/bin/bash

if [ -z "$2" ]; then
  echo "Usage: ./convert_to_text.sh in.pdf out.txt";
  exit 1;
fi

INPUT=$1
OUTPUT=$2
TYPE=`file -b --mime-type "$INPUT"`

# file occasionally misidentifies plain text as pascal given the presence of certain keywords :(
if [[ "$TYPE" == "text/plain" || "$TYPE" == "text/x-pascal" ]]; then
  cp "$INPUT" "$OUTPUT";
elif [[ "$TYPE" == "application/pdf" && -n `which pdftotext` ]]; then
  pdftotext -raw -enc UTF-8 "$INPUT" "$OUTPUT";
elif [[ "$TYPE" == "application/postscript" && -n `which ps2ascii` ]]; then
  ps2ascii "$INPUT" > "$OUTPUT";
elif [[ -n `which abiword` ]]; then
  abiword -t txt "$INPUT" -o "$OUTPUT";
fi

Version data entries

7 entries across 7 versions & 1 rubygems

Version Path
biblicit-2.3.2 sh/convert_to_text.sh
biblicit-2.3.1 sh/convert_to_text.sh
biblicit-2.3.0 sh/convert_to_text.sh
biblicit-2.2.3 sh/convert_to_text.sh
biblicit-2.2.2 sh/convert_to_text.sh
biblicit-2.2.1 sh/convert_to_text.sh
biblicit-2.2.0 sh/convert_to_text.sh