lib/webrobots/robotstxt.rb in webrobots-0.0.5 vs lib/webrobots/robotstxt.rb in webrobots-0.0.6
- old
+ new
@@ -25,12 +25,14 @@
def initialize(target = nil)
super()
@target = target
end
- def self.parse(input, target = nil)
- new(target).parse(input)
+ def parse!(input, site)
+ parse(input, site)
+ rescue Error => e
+ RobotsTxt.new(site, nil, :error => e, :target => @target)
end
KNOWN_TOKENS = %w[User-agent Allow Disallow Crawl-delay Sitemap]
RE_KNOWN_TOKENS = /#{KNOWN_TOKENS.join('|')}/i
@@ -517,15 +519,16 @@
end
end # class Parser
def initialize(site, records, options = nil)
- super()
+ @timestamp = Time.now
@site = site
@options = options || {}
@last_checked = nil
+ @error = @options[:error]
@target = @options[:target]
@sitemaps = @options[:sitemaps] || []
if records && !records.empty?
@records, defaults = [], []
@@ -540,12 +543,17 @@
else
@records = []
end
end
- attr_reader :site, :sitemaps
+ attr_reader :timestamp, :site, :sitemaps
+ attr_accessor :error
+ def error!
+ raise @error if @error
+ end
+
def target(user_agent = nil)
if user_agent
raise ArgumentError, "this instance is targeted for #{@target}" if @target
user_agent
else
@@ -575,9 +583,20 @@
end
def options(user_agent = nil)
record = find_record(user_agent) or return {}
record.options
+ end
+
+ DISALLOW_ALL = <<-TXT
+User-Agent: *
+Disallow: /
+ TXT
+
+ def self.unfetchable(site, reason, target = nil)
+ Parser.new(target).parse(DISALLOW_ALL, site).tap { |robots_txt|
+ robots_txt.error = reason
+ }
end
class Record
def initialize(agentlines, rulelines)
@patterns = agentlines.map { |agentline| agentline.pattern }