Sha256: 079efc2bc123d78ae7d12fce02521591fc8a95a4381c5f15ec3523fd1a87789b

Contents?: true

Size: 1.61 KB

Versions: 16

Compression:

Stored size: 1.61 KB

Contents

module MetaInspector
  module Parsers
    class TextsParser < Base
      delegate [:parsed, :meta] => :@main_parser

      # Returns the parsed document title, from the content of the <title> tag
      # within the <head> section.
      def title
        @title ||= parsed.css('head title').inner_text rescue nil
      end

      def best_title
        @best_title = meta['og:title'] if @main_parser.host =~ /\.youtube\.com$/
        @best_title ||= find_best_title
      end

      # A description getter that first checks for a meta description
      # and if not present will guess by looking at the first paragraph
      # with more than 120 characters
      def description
        meta['description'] || secondary_description
      end

      private

      # Look for candidates and pick the longest one
      def find_best_title
        candidates = [
            parsed.css('head title'),
            parsed.css('body title'),
            meta['og:title'],
            parsed.css('h1').first
        ]
        candidates.flatten!
        candidates.compact!
        candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
        candidates.map! { |c| c.strip }
        return nil if candidates.empty?
        candidates.map! { |c| c.gsub(/\s+/, ' ') }
        candidates.uniq!
        candidates.sort_by! { |t| -t.length }
        candidates.first
      end

      # Look for the first <p> block with 120 characters or more
      def secondary_description
        first_long_paragraph = parsed.search('//p[string-length() >= 120]').first
        first_long_paragraph ? first_long_paragraph.text : ''
      end
    end
  end
end

Version data entries

16 entries across 16 versions & 1 rubygems

Version Path
metainspector-5.2.0 lib/meta_inspector/parsers/texts.rb
metainspector-5.1.3 lib/meta_inspector/parsers/texts.rb
metainspector-5.1.2 lib/meta_inspector/parsers/texts.rb
metainspector-5.1.1 lib/meta_inspector/parsers/texts.rb
metainspector-5.1.0 lib/meta_inspector/parsers/texts.rb
metainspector-5.0.2 lib/meta_inspector/parsers/texts.rb
metainspector-5.0.1 lib/meta_inspector/parsers/texts.rb
metainspector-5.0.0 lib/meta_inspector/parsers/texts.rb
metainspector-5.0.0.rc1 lib/meta_inspector/parsers/texts.rb
metainspector-4.7.2 lib/meta_inspector/parsers/texts.rb
metainspector-4.7.1 lib/meta_inspector/parsers/texts.rb
metainspector-4.7.0 lib/meta_inspector/parsers/texts.rb
metainspector-4.6.1 lib/meta_inspector/parsers/texts.rb
metainspector-4.6.0 lib/meta_inspector/parsers/texts.rb
metainspector-4.5.0 lib/meta_inspector/parsers/texts.rb
metainspector-4.4.2 lib/meta_inspector/parsers/texts.rb