Sha256: 79fe4e507bd6b5c34bcdbbd49fba0801fb547f640a49ce517b09a309a15892d1

Contents?: true

Size: 1.07 KB

Versions: 1

Compression:

Stored size: 1.07 KB

Contents

require 'mechanize'

module Artaius
  module Plugins
    # Scraps web pages, that have <title> attribute.
    class Scraper
      include Cinch::Plugin

      listen_to :channel,
                 method: :scrap_links

      def scrap_links(m)
        unless @agent
          @agent = Mechanize.new
          @agent.user_agent_alias = 'Linux Firefox'
        end

        URI.extract(m.message, %w[http https]) do |link|
          begin
            page = @agent.get(link)
            uri = URI.parse(link)
          rescue Mechanize::ResponseCodeError
            m.reply I18n.scraper.broken_link and next
          end

          title = page.title.gsub(/[\x00-\x1f]*/, "").gsub(/[ ]{2,}/, " ").strip rescue nil

          if title
            case uri.host
            when 'forum.kag2d.com'
              pattern = / \| Page \d{1,4} \| King Arthur's Gold Forum$/
              title.sub!(pattern, '')
              m.reply I18n.scraper.h.kag_forum(title)
            else
              m.reply I18n.scraper.title(title)
            end
          end
        end
      end

    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
artaius-0.2.1 lib/artaius/plugins/scraper.rb