Sha256: 79b11688c8c4aeef9806e464a4fc678ed1394e953accf29a09543335b304729d

Contents?: true

Size: 1.03 KB

Versions: 1

Compression:

Stored size: 1.03 KB

Contents

# frozen_string_literal: true

# This file is part of Alexandria.
#
# See the file README.md for authorship and licensing information.

require 'hpricot'
require 'htmlentities'

module Alexandria
  class BookProviders
    class WebsiteBasedProvider < GenericProvider
      def initialize(name, fullname = nil)
        super(name, fullname)
        @htmlentities = HTMLEntities.new
      end

      def html_to_doc(html, source_data_charset = 'ISO-8859-1')
        html.force_encoding source_data_charset
        utf8_html = html.encode('utf-8')
        normalized_html = @htmlentities.decode(utf8_html)
        Hpricot(normalized_html)
      end

      ## from Palatina
      def text_of(node)
        if node.nil?
          nil
        elsif node.text?
          node.to_html
        elsif node.elem?
          if node.children.nil?
            nil
          else
            node_text = node.children.map { |n| text_of(n) }.join
            node_text.strip.squeeze(' ')
          end
        end
        # node.inner_html.strip
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
alexandria-book-collection-manager-0.7.3 lib/alexandria/book_providers/web.rb