#!/usr/bin/env ruby # # Rpdf2txt -- PDF to Text Parser # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Zürich, Switzerland # hwyss@ywesee.com, aschrafl@ywesee.com # # AttributesParser -- Rpdf2txt -- 08.09.2004 -- hwyss@ywesee.com require 'rpdf2txt/parser' padding = ' ' begin require 'encoding/character/utf-8' padding = u(padding) rescue LoadError end hclass = Rpdf2txt::SimpleHandler hargs = [] while /^-/.match(ARGV.first) case ARGV.shift when '--columns', '-c' hclass = Rpdf2txt::ColumnHandler hargs = [padding] end end if(!(1..2).include?(ARGV.size) \ || !File.exist?(ARGV.first)) puts <<-EOS Usage: rpdf2txt [] if is omitted, the extracted text is written to stdout EOS exit end parser = Rpdf2txt::Parser.new(File.read(ARGV[0]), 'utf8') outstream = STDOUT if(ARGV.size == 2) outstream = File.open(ARGV[1], 'w') end handler = hclass.new(outstream, *hargs) parser.extract_text(handler)