Sha256: 988455f1eb3e3f469bbe5ef34852aff2de7d44e252d34ef620ae4e6230519587

Contents?: true

Size: 1.17 KB

Versions: 2

Compression:

Stored size: 1.17 KB

Contents

%w{
hpricot
}.each { |m| require m }

module Murlsh

  # Hpricot:Doc mixin.
  module Doc

    # Get the character set of the document.
    def charset
      %w{content-type Content-Type}.each do |ct|
        content_type = at("meta[@http-equiv='#{ct}']")
        unless content_type.nil?
          content = content_type['content']
          unless content.nil?
            charset = content[/charset=([\w_.:-]+)/, 1]
            return charset if charset
          end
        end
      end
      nil
    end

    # Check a list of xpaths in order and return the inner html of the first
    # one that is not nil.
    def xpath_search(xpaths)
      [*xpaths].each do |xpath|
        selection = (self/xpath).first
        if selection; return (yield selection); end
      end
      nil
    end

    # Get the title of the document.
    def title
      xpath_search(%w{
        //html/head/title
        //head/title
        //html/title
        //title
        }) { |node| node.inner_html }
    end

    # Get the meta description of the document.
    def description
      xpath_search(
        "//html/head/meta[@name='description']"
        ) { |node| node['content'] }
    end

  end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
murlsh-0.8.1 lib/murlsh/doc.rb
murlsh-0.8.0 lib/murlsh/doc.rb