Sha256: 23f327bbe84522ade206b10fa67a09ce7108f86baf24ac08b14196b6a063a986

Contents?: true

Size: 1.44 KB

Versions: 5

Compression:

Stored size: 1.44 KB

Contents

module MetaInspector
  module Parsers
    class TextsParser < Base
      delegate [:parsed, :meta] => :@main_parser

      # Returns the parsed document title, from the content of the <title> tag
      # within the <head> section.
      def title
        @title ||= parsed.css('head title').inner_text rescue nil
      end

      def best_title
        @best_title ||= begin
          candidates = [
              parsed.css('head title'),
              parsed.css('body title'),
              meta['og:title'],
              parsed.css('h1').first
          ]
          candidates.flatten!
          candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
          candidates.compact!
          return nil if candidates.empty?
          candidates.map! { |c| c.gsub(/\s+/, ' ') }
          candidates.uniq!
          candidates.sort_by! { |t| -t.length }
          candidates.first.strip
        end
      end

      # A description getter that first checks for a meta description
      # and if not present will guess by looking at the first paragraph
      # with more than 120 characters
      def description
        meta['description'] || secondary_description
      end

      private

      # Look for the first <p> block with 120 characters or more
      def secondary_description
        first_long_paragraph = parsed.search('//p[string-length() >= 120]').first
        first_long_paragraph ? first_long_paragraph.text : ''
      end
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
metainspector-4.4.1 lib/meta_inspector/parsers/texts.rb
metainspector-4.4.0 lib/meta_inspector/parsers/texts.rb
metainspector-4.3.3 lib/meta_inspector/parsers/texts.rb
metainspector-4.3.2 lib/meta_inspector/parsers/texts.rb
metainspector-4.3.1 lib/meta_inspector/parsers/texts.rb