lib/docparser/document.rb in docparser-0.1.4 vs lib/docparser/document.rb in docparser-0.1.6
- old
+ new
@@ -1,6 +1,6 @@
-require 'set'
+require 'nokogiri'
module DocParser
# The Document class loads and parses the files.
# @see Parser
# @see Output
class Document
@@ -8,26 +8,17 @@
# @return [String] the source of the document
attr_reader :html
def initialize(filename: nil, encoding: 'utf-8', parser: nil)
- if encoding == 'utf-8'
- encodingstring = 'r:utf-8'
- else
- encodingstring = "r:#{encoding}:utf-8"
- end
@logger = Log4r::Logger.new('docparser::document')
@logger.debug { "Parsing #{filename}" }
- open(filename, encodingstring) do |f|
- @html = f.read
- @logger.warn "#{filename} is empty" if @html.empty?
- @doc = Nokogiri(@html)
- end
@encoding = encoding
@parser = parser
@filename = filename
@results = Array.new(@parser.outputs ? @parser.outputs.length : 0) { [] }
+ read_file
end
# Adds a row to an output
def add_row(*row, output: 0)
output = @parser.outputs.index(output) if output.is_a? Output
@@ -77,9 +68,20 @@
# @!visibility private
def inspect
"<Document file:'#{@filename}', encoding:'#{@encoding}'>"
end
+ private
+
+ def read_file
+ encodingstring = @encoding == 'utf-8' ? 'r:utf-8' : "r:#{encoding}:utf-8"
+ open(@filename, encodingstring) do |f|
+ @html = f.read
+ @logger.warn "#{filename} is empty" if @html.empty?
+ @doc = Nokogiri(@html)
+ end
+ end
+
alias_method :css, :xpath
alias_method :css_content, :xpath_content
end
-end
\ No newline at end of file
+end