lib/webrobots/robotstxt.rb in webrobots-0.0.5 vs lib/webrobots/robotstxt.rb in webrobots-0.0.6

- old
+ new

@@ -25,12 +25,14 @@ def initialize(target = nil) super() @target = target end - def self.parse(input, target = nil) - new(target).parse(input) + def parse!(input, site) + parse(input, site) + rescue Error => e + RobotsTxt.new(site, nil, :error => e, :target => @target) end KNOWN_TOKENS = %w[User-agent Allow Disallow Crawl-delay Sitemap] RE_KNOWN_TOKENS = /#{KNOWN_TOKENS.join('|')}/i @@ -517,15 +519,16 @@ end end # class Parser def initialize(site, records, options = nil) - super() + @timestamp = Time.now @site = site @options = options || {} @last_checked = nil + @error = @options[:error] @target = @options[:target] @sitemaps = @options[:sitemaps] || [] if records && !records.empty? @records, defaults = [], [] @@ -540,12 +543,17 @@ else @records = [] end end - attr_reader :site, :sitemaps + attr_reader :timestamp, :site, :sitemaps + attr_accessor :error + def error! + raise @error if @error + end + def target(user_agent = nil) if user_agent raise ArgumentError, "this instance is targeted for #{@target}" if @target user_agent else @@ -575,9 +583,20 @@ end def options(user_agent = nil) record = find_record(user_agent) or return {} record.options + end + + DISALLOW_ALL = <<-TXT +User-Agent: * +Disallow: / + TXT + + def self.unfetchable(site, reason, target = nil) + Parser.new(target).parse(DISALLOW_ALL, site).tap { |robots_txt| + robots_txt.error = reason + } end class Record def initialize(agentlines, rulelines) @patterns = agentlines.map { |agentline| agentline.pattern }