Sha256: 988455f1eb3e3f469bbe5ef34852aff2de7d44e252d34ef620ae4e6230519587
Contents?: true
Size: 1.17 KB
Versions: 2
Compression:
Stored size: 1.17 KB
Contents
%w{ hpricot }.each { |m| require m } module Murlsh # Hpricot:Doc mixin. module Doc # Get the character set of the document. def charset %w{content-type Content-Type}.each do |ct| content_type = at("meta[@http-equiv='#{ct}']") unless content_type.nil? content = content_type['content'] unless content.nil? charset = content[/charset=([\w_.:-]+)/, 1] return charset if charset end end end nil end # Check a list of xpaths in order and return the inner html of the first # one that is not nil. def xpath_search(xpaths) [*xpaths].each do |xpath| selection = (self/xpath).first if selection; return (yield selection); end end nil end # Get the title of the document. def title xpath_search(%w{ //html/head/title //head/title //html/title //title }) { |node| node.inner_html } end # Get the meta description of the document. def description xpath_search( "//html/head/meta[@name='description']" ) { |node| node['content'] } end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
murlsh-0.8.1 | lib/murlsh/doc.rb |
murlsh-0.8.0 | lib/murlsh/doc.rb |