lib/docparser/document.rb in docparser-0.1.4 vs lib/docparser/document.rb in docparser-0.1.6

- old
+ new

@@ -1,6 +1,6 @@ -require 'set' +require 'nokogiri' module DocParser # The Document class loads and parses the files. # @see Parser # @see Output class Document @@ -8,26 +8,17 @@ # @return [String] the source of the document attr_reader :html def initialize(filename: nil, encoding: 'utf-8', parser: nil) - if encoding == 'utf-8' - encodingstring = 'r:utf-8' - else - encodingstring = "r:#{encoding}:utf-8" - end @logger = Log4r::Logger.new('docparser::document') @logger.debug { "Parsing #{filename}" } - open(filename, encodingstring) do |f| - @html = f.read - @logger.warn "#{filename} is empty" if @html.empty? - @doc = Nokogiri(@html) - end @encoding = encoding @parser = parser @filename = filename @results = Array.new(@parser.outputs ? @parser.outputs.length : 0) { [] } + read_file end # Adds a row to an output def add_row(*row, output: 0) output = @parser.outputs.index(output) if output.is_a? Output @@ -77,9 +68,20 @@ # @!visibility private def inspect "<Document file:'#{@filename}', encoding:'#{@encoding}'>" end + private + + def read_file + encodingstring = @encoding == 'utf-8' ? 'r:utf-8' : "r:#{encoding}:utf-8" + open(@filename, encodingstring) do |f| + @html = f.read + @logger.warn "#{filename} is empty" if @html.empty? + @doc = Nokogiri(@html) + end + end + alias_method :css, :xpath alias_method :css_content, :xpath_content end -end \ No newline at end of file +end