#!/usr/bin/env ruby # # Rpdf2txt -- PDF to Text Parser # Copyright (C) 2003 Andreas Schrafl, Hannes Wyss # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Zürich, Switzerland # hwyss@ywesee.com, aschrafl@ywesee.com # # TextParser -- Rpdf2txt -- 04.11.2004 -- mwalder@ywesee.com # rwaltert@ywesee.com require 'rpdf2txt-rockit/rockit' module Rpdf2txt CMAP_GRAMMAR = File.expand_path('data/cmap.grammar', File.dirname(__FILE__)) CMAP_PARSER = File.expand_path('data/cmap.rb', File.dirname(__FILE__)) CMAP_RANGE_GRAMMAR = File.expand_path('data/cmap_range.grammar', File.dirname(__FILE__)) CMAP_RANGE_PARSER = File.expand_path('data/cmap_range.rb', File.dirname(__FILE__)) def Rpdf2txt.cmap_parser(grammar_path=CMAP_GRAMMAR, parser_path=CMAP_PARSER) oldpath = File.dirname(grammar_path) \ << "/_" << File.basename(grammar_path) src = File.read(grammar_path) unless(File.exists?(oldpath) && File.read(oldpath)==src) File.delete(oldpath) if File.exists?(oldpath) Parse.generate_parser_from_file_to_file(grammar_path, parser_path, '_cmap_parser', 'Rpdf2txt') File.open(oldpath, 'w') { |f| f << src } end require parser_path Rpdf2txt._cmap_parser end def Rpdf2txt.cmap_range_parser(grammar_path=CMAP_RANGE_GRAMMAR, parser_path=CMAP_RANGE_PARSER) oldpath = File.dirname(grammar_path) \ << "/_" << File.basename(grammar_path) src = File.read(grammar_path) unless(File.exists?(oldpath) && File.read(oldpath)==src) File.delete(oldpath) if File.exists?(oldpath) Parse.generate_parser_from_file_to_file(grammar_path, parser_path, '_cmap_range_parser', 'Rpdf2txt') File.open(oldpath, 'w') { |f| f << src } end require parser_path Rpdf2txt._cmap_range_parser end end