lib/webrobots/robotstxt.rb in webrobots-0.0.10 vs lib/webrobots/robotstxt.rb in webrobots-0.0.11
- old
+ new
@@ -17,11 +17,11 @@
end
class RobotsTxt
class Parser < Racc::Parser
-module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 163)
+module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 158)
def initialize(target = nil)
super()
@target = target
end
@@ -36,18 +36,19 @@
RE_KNOWN_TOKENS = /\A(#{KNOWN_TOKENS.map { |t| Regexp.quote(t) }.join('|')})\z/i
def parse(input, site)
@q ||= []
@errors = []
- @lineno = 1
+ @lineno = 0
@site = site
string = input.respond_to?(:read) ? input.read : input
s = StringScanner.new(string)
value_expected = false
until s.eos?
+ @lineno += 1 if s.bol?
if t = s.scan(/[ \t]*(?:\r?\n|\z)/)
if value_expected
@q << [:VALUE, '']
end
@q << [:EOL, t]
@@ -113,74 +114,74 @@
...end robotstxt.ry/module_eval...
##### State transition tables begin ###
racc_action_table = [
- 6, 13, -11, 17, 53, 6, -13, 37, 38, 39,
- 40, 13, -11, 17, 47, 28, 28, 37, 38, 39,
- 40, 13, -11, 17, 50, 51, 52, 37, 38, 39,
- 40, 13, -11, 17, 13, 54, 25, 37, 38, 39,
- 40, 13, -11, 17, 13, 13, -13, 13, -11, 17,
- 6, 13, -14, 17, 6, 13, 13, 17, 6, 13,
- 13, 17, 6, 13, 13, 17, 6, 13, 24, 17,
- 6, 13, 63, 17, 64, 65, 66, 67, 6, 10,
- 6, 7, 6 ]
+ 5, 12, -10, 16, 52, 40, -12, 36, 37, 38,
+ 39, 12, -10, 16, 46, 27, 27, 36, 37, 38,
+ 39, 12, -10, 16, 49, 50, 51, 36, 37, 38,
+ 39, 12, -10, 16, 12, 53, 24, 36, 37, 38,
+ 39, 12, -10, 16, 12, 12, -12, 12, -10, 16,
+ 60, 12, -13, 16, 60, 12, 12, 16, 60, 12,
+ 12, 16, 60, 12, 12, 16, 60, 12, 23, 16,
+ 60, 12, 62, 16, 63, 64, 65, 66, 5, 9,
+ 5, 6, 5 ]
racc_action_check = [
- 22, 22, 22, 22, 40, 24, 22, 22, 22, 22,
- 22, 26, 26, 26, 28, 20, 26, 26, 26, 26,
- 26, 46, 46, 46, 37, 38, 39, 46, 46, 46,
- 46, 30, 30, 30, 25, 42, 17, 30, 30, 30,
- 30, 8, 8, 8, 47, 50, 8, 14, 14, 14,
- 63, 63, 14, 63, 54, 54, 51, 54, 64, 64,
- 52, 64, 65, 65, 53, 65, 66, 66, 16, 66,
- 67, 67, 55, 67, 56, 57, 58, 59, 12, 7,
+ 21, 21, 21, 21, 39, 23, 21, 21, 21, 21,
+ 21, 25, 25, 25, 27, 19, 25, 25, 25, 25,
+ 25, 45, 45, 45, 36, 37, 38, 45, 45, 45,
+ 45, 29, 29, 29, 24, 41, 16, 29, 29, 29,
+ 29, 7, 7, 7, 46, 49, 7, 13, 13, 13,
+ 62, 62, 13, 62, 53, 53, 50, 53, 63, 63,
+ 51, 63, 64, 64, 52, 64, 65, 65, 15, 65,
+ 66, 66, 54, 66, 55, 56, 57, 58, 11, 6,
3, 1, 0 ]
racc_action_pointer = [
- 80, 81, nil, 78, nil, nil, nil, 79, 38, nil,
- nil, nil, 76, nil, 44, nil, 64, 30, nil, nil,
- 7, nil, -2, nil, 3, 31, 8, nil, 8, nil,
- 28, nil, nil, nil, nil, nil, nil, 18, 19, 20,
- -2, nil, 28, nil, nil, nil, 18, 41, nil, nil,
- 42, 53, 57, 61, 52, 65, 67, 68, 69, 70,
- nil, nil, nil, 48, 56, 60, 64, 68, nil, nil,
- nil, nil, nil ]
+ 80, 81, nil, 78, nil, nil, 79, 38, nil, nil,
+ nil, 76, nil, 44, nil, 64, 30, nil, nil, 7,
+ nil, -2, nil, 3, 31, 8, nil, 8, nil, 28,
+ nil, nil, nil, nil, nil, nil, 18, 19, 20, -2,
+ nil, 28, nil, nil, nil, 18, 41, nil, nil, 42,
+ 53, 57, 61, 52, 65, 67, 68, 69, 70, nil,
+ nil, nil, 48, 56, 60, 64, 68, nil, nil, nil,
+ nil, nil ]
racc_action_default = [
- -5, -45, -1, -6, -7, -9, -10, -45, -3, -8,
- 73, -2, -5, -12, -24, -15, -45, -45, -19, -20,
- -45, -4, -6, -16, -45, -11, -30, -26, -45, -21,
- -22, -23, -32, -35, -36, -37, -38, -45, -45, -45,
- -45, -17, -45, -25, -27, -28, -31, -11, -33, -34,
- -11, -11, -11, -11, -11, -45, -45, -45, -45, -45,
- -18, -43, -44, -11, -11, -11, -11, -11, -29, -39,
- -40, -41, -42 ]
+ -5, -44, -1, -6, -7, -9, -44, -3, -8, 72,
+ -2, -5, -11, -23, -14, -44, -44, -18, -19, -44,
+ -4, -6, -15, -44, -10, -29, -25, -44, -20, -21,
+ -22, -31, -34, -35, -36, -37, -44, -44, -44, -44,
+ -16, -44, -24, -26, -27, -30, -10, -32, -33, -10,
+ -10, -10, -10, -10, -44, -44, -44, -44, -44, -17,
+ -42, -43, -10, -10, -10, -10, -10, -28, -38, -39,
+ -40, -41 ]
racc_goto_table = [
- 15, 42, 9, 48, 3, 12, 23, 11, 5, 27,
- 18, 5, 26, 2, 15, 44, 22, 19, 45, 48,
- 5, 9, 49, 55, 29, 21, 56, 57, 58, 59,
- 5, 31, 41, 60, 43, 30, 8, 1, 49, 46,
- nil, nil, 68, 69, 70, 71, 72 ]
+ 14, 41, 8, 47, 3, 2, 22, 17, 29, 11,
+ 18, 26, 45, 10, 14, 21, 20, 43, 44, 47,
+ 8, 28, 48, 54, 30, 25, 55, 56, 57, 58,
+ 59, 42, 7, 1, nil, nil, nil, nil, 48, 67,
+ 68, 69, 70, 71 ]
racc_goto_check = [
- 12, 9, 7, 20, 6, 5, 12, 3, 8, 19,
- 14, 8, 17, 2, 12, 19, 6, 15, 12, 20,
- 8, 7, 12, 9, 14, 2, 9, 9, 9, 9,
- 8, 15, 8, 13, 18, 16, 4, 1, 12, 16,
- nil, nil, 13, 13, 13, 13, 13 ]
+ 11, 8, 7, 19, 6, 2, 11, 13, 15, 5,
+ 14, 18, 15, 3, 11, 6, 2, 18, 11, 19,
+ 7, 13, 11, 8, 14, 16, 8, 8, 8, 8,
+ 12, 17, 4, 1, nil, nil, nil, nil, 11, 12,
+ 12, 12, 12, 12 ]
racc_goto_pointer = [
- nil, 37, 13, -1, 34, -3, 4, -1, 8, -24,
- nil, nil, -8, -21, 2, 9, 13, -8, 8, -11,
- -27, nil, nil, nil, nil ]
+ nil, 33, 5, 6, 30, 2, 4, -1, -23, nil,
+ nil, -7, -23, 0, 3, -13, 6, 6, -8, -26,
+ nil, nil, nil, nil ]
racc_goto_default = [
- nil, nil, nil, nil, nil, nil, nil, 4, 61, 16,
- 20, 14, 62, nil, nil, nil, nil, nil, nil, nil,
- 32, 33, 34, 35, 36 ]
+ nil, nil, nil, nil, nil, nil, nil, 4, 15, 19,
+ 13, 61, nil, nil, nil, nil, nil, nil, nil, 31,
+ 32, 33, 34, 35 ]
racc_reduce_table = [
0, 0, :racc_error,
0, 17, :_reduce_1,
3, 14, :_reduce_2,
@@ -189,49 +190,48 @@
0, 15, :_reduce_none,
1, 15, :_reduce_none,
1, 19, :_reduce_none,
2, 19, :_reduce_none,
1, 20, :_reduce_none,
- 1, 21, :_reduce_10,
+ 0, 21, :_reduce_none,
+ 1, 21, :_reduce_none,
0, 22, :_reduce_none,
1, 22, :_reduce_none,
- 0, 23, :_reduce_none,
1, 23, :_reduce_none,
- 1, 24, :_reduce_none,
- 2, 24, :_reduce_none,
- 3, 25, :_reduce_none,
- 5, 25, :_reduce_18,
+ 2, 23, :_reduce_none,
+ 3, 24, :_reduce_none,
+ 5, 24, :_reduce_17,
+ 1, 18, :_reduce_18,
1, 18, :_reduce_19,
- 1, 18, :_reduce_20,
+ 3, 18, :_reduce_20,
3, 18, :_reduce_21,
- 3, 18, :_reduce_22,
3, 18, :_reduce_none,
- 1, 28, :_reduce_none,
- 3, 27, :_reduce_25,
- 1, 30, :_reduce_26,
- 2, 30, :_reduce_27,
- 2, 30, :_reduce_none,
- 5, 32, :_reduce_29,
- 0, 31, :_reduce_none,
- 1, 31, :_reduce_none,
- 1, 29, :_reduce_32,
- 2, 29, :_reduce_33,
+ 1, 27, :_reduce_none,
+ 3, 26, :_reduce_24,
+ 1, 29, :_reduce_25,
+ 2, 29, :_reduce_26,
2, 29, :_reduce_none,
- 1, 33, :_reduce_none,
- 1, 33, :_reduce_none,
- 1, 33, :_reduce_none,
- 1, 33, :_reduce_none,
+ 5, 31, :_reduce_28,
+ 0, 30, :_reduce_none,
+ 1, 30, :_reduce_none,
+ 1, 28, :_reduce_31,
+ 2, 28, :_reduce_32,
+ 2, 28, :_reduce_none,
+ 1, 32, :_reduce_none,
+ 1, 32, :_reduce_none,
+ 1, 32, :_reduce_none,
+ 1, 32, :_reduce_none,
+ 5, 33, :_reduce_38,
5, 34, :_reduce_39,
5, 35, :_reduce_40,
5, 36, :_reduce_41,
- 5, 37, :_reduce_42,
- 1, 26, :_reduce_none,
- 1, 26, :_reduce_none ]
+ 1, 25, :_reduce_none,
+ 1, 25, :_reduce_none ]
-racc_reduce_n = 45
+racc_reduce_n = 44
-racc_shift_n = 73
+racc_shift_n = 72
racc_token_table = {
false => 0,
:error => 1,
:EOL => 2,
@@ -286,11 +286,10 @@
"body",
"@1",
"records",
"blanklines",
"blankline",
- "eol",
"opt_space",
"opt_commentlines",
"commentlines",
"comment",
"eol_opt_comment",
@@ -342,17 +341,11 @@
# reduce 8 omitted
# reduce 9 omitted
-module_eval(<<'.,.,', 'robotstxt.ry', 31)
- def _reduce_10(val, _values, result)
- @lineno += 1
-
- result
- end
-.,.,
+# reduce 10 omitted
# reduce 11 omitted
# reduce 12 omitted
@@ -362,160 +355,158 @@
# reduce 15 omitted
# reduce 16 omitted
-# reduce 17 omitted
-
-module_eval(<<'.,.,', 'robotstxt.ry', 47)
- def _reduce_18(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 42)
+ def _reduce_17(val, _values, result)
@sitemaps << val[3]
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 52)
- def _reduce_19(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 47)
+ def _reduce_18(val, _values, result)
result = []
result << val[0]
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 57)
- def _reduce_20(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 52)
+ def _reduce_19(val, _values, result)
result = []
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 63)
- def _reduce_21(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 58)
+ def _reduce_20(val, _values, result)
result << val[2]
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 69)
- def _reduce_22(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 64)
+ def _reduce_21(val, _values, result)
val[2].each_with_index { |line, i|
warn "%s line %d: %s: orphan rule line" %
[@site.to_s, @rulelinenos[i], line.token] if $VERBOSE
}
result
end
.,.,
+# reduce 22 omitted
+
# reduce 23 omitted
-# reduce 24 omitted
-
-module_eval(<<'.,.,', 'robotstxt.ry', 84)
- def _reduce_25(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 79)
+ def _reduce_24(val, _values, result)
result = Record.new(val[1], val[2])
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 89)
- def _reduce_26(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 84)
+ def _reduce_25(val, _values, result)
result = [val[0]]
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 94)
- def _reduce_27(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 89)
+ def _reduce_26(val, _values, result)
result << val[1]
result
end
.,.,
-# reduce 28 omitted
+# reduce 27 omitted
-module_eval(<<'.,.,', 'robotstxt.ry', 101)
- def _reduce_29(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 96)
+ def _reduce_28(val, _values, result)
result = AgentLine.new(val[0], val[3])
result
end
.,.,
+# reduce 29 omitted
+
# reduce 30 omitted
-# reduce 31 omitted
-
-module_eval(<<'.,.,', 'robotstxt.ry', 109)
- def _reduce_32(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 104)
+ def _reduce_31(val, _values, result)
result = [result]
@rulelinenos = []
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 115)
- def _reduce_33(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 110)
+ def _reduce_32(val, _values, result)
result << val[1]
@rulelinenos << @lineno
result
end
.,.,
+# reduce 33 omitted
+
# reduce 34 omitted
# reduce 35 omitted
# reduce 36 omitted
# reduce 37 omitted
-# reduce 38 omitted
-
-module_eval(<<'.,.,', 'robotstxt.ry', 128)
- def _reduce_39(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 123)
+ def _reduce_38(val, _values, result)
result = AllowLine.new(val[0], val[3])
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 133)
- def _reduce_40(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 128)
+ def _reduce_39(val, _values, result)
result = DisallowLine.new(val[0], val[3])
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 138)
- def _reduce_41(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 133)
+ def _reduce_40(val, _values, result)
result = CrawlDelayLine.new(val[0], val[3])
result
end
.,.,
-module_eval(<<'.,.,', 'robotstxt.ry', 143)
- def _reduce_42(val, _values, result)
+module_eval(<<'.,.,', 'robotstxt.ry', 138)
+ def _reduce_41(val, _values, result)
result = ExtentionLine.new(val[0], val[3])
result
end
.,.,
-# reduce 43 omitted
+# reduce 42 omitted
-# reduce 44 omitted
+# reduce 43 omitted
def _reduce_none(val, _values, result)
val[0]
end