Sha256: e0a3193a07e3e9e9db0e8d067d6a8f3f658f7e026a123fc1f7f2f23fe7939539
Contents?: true
Size: 1.26 KB
Versions: 2
Compression:
Stored size: 1.26 KB
Contents
module Ogo class PageSource attr_reader :url, :src, :charset, :doc def initialize(src, options={}) @src = src @url = options[:url] @charset = options[:charset] end def parse unless charset _doc = Nokogiri.parse(src.scrub) @charset = guess_encoding(_doc) end Nokogiri::HTML(src, nil, charset) end def parse! @doc = parse self end def inspect str = "<Ogo::PageSource:0x00#{'%x' % (self.object_id << 1)}\n" str << "url=\"#{url}\",\n" str << "charset=\"#{charset}\",\n" str << "src=\"#{src.to_s.truncate(100, omission: '...')}\",\n" str << "doc=#{doc.to_s.truncate(100, omission: '...')}\" >" str end def to_s inspect end private def guess_encoding(_doc) _charset = _doc.xpath('//meta/@charset').first return _charset.value.to_s if charset _charset = _doc.xpath('//meta').each do |m| if content_tag?(m) return m.attribute('content').value.split('charset=').last.strip end end 'UTF-8' end def content_tag?(m) m.attribute('http-equiv') && m.attribute('content') && m.attribute('http-equiv').value.casecmp('Content-Type') end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
ogo-0.1.2 | lib/ogo/page_source.rb |
ogo-0.1.1 | lib/ogo/page_source.rb |