Sha256: d4c0b63061984bfa39bb181c8b36a52b17173aedf9d5f44372113e67c1fe0ffa

Contents?: true

Size: 1.14 KB

Versions: 1

Compression:

Stored size: 1.14 KB

Contents

module Murlsh

  # Hpricot:Doc mixin.
  module Doc

    # Get the character set of the document.
    def charset
      %w{content-type Content-Type}.each do |ct|
        content_type = at("meta[@http-equiv='#{ct}']")
        unless content_type.nil?
          content = content_type['content']
          unless content.nil?
            charset = content[/charset=([\w_.:-]+)/, 1]
            return charset  if charset
          end
        end
      end
      nil
    end

    # Check a list of xpaths in order and yield and return the node matching
    # the first one that is not nil
    def xpath_search(xpaths)
      [*xpaths].each do |xpath|
        selection = (self/xpath).first
        if selection; return (yield selection); end
      end
      nil
    end

    # Get the title of the document.
    def title
      xpath_search(%w{
        //html/head/title
        //head/title
        //html/title
        //title
        }) { |node| node.inner_html }
    end

    # Get the meta description of the document.
    def description
      xpath_search(
        "//html/head/meta[@name='description']"
        ) { |node| node['content'] }
    end

  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
murlsh-1.1.0 lib/murlsh/doc.rb