lib/regex.rb in twitter-text-1.1.4 vs lib/regex.rb in twitter-text-1.1.5

- old
+ new

@@ -1,6 +1,6 @@ -# coding: UTF-8 +# encoding: utf-8 module Twitter # A collection of regular expressions for parsing Tweet text. The regular expression # list is frozen at load time to ensure immutability. These reular expressions are # used throughout the <tt>Twitter</tt> classes. Special care has been taken to make @@ -29,11 +29,16 @@ REGEXEN[:at_signs] = /[@@]/ REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o - REGEXEN[:list_name] = /^[a-zA-Z\u0080-\u00ff].{0,79}$/ + major, minor, patch = RUBY_VERSION.split(/\./) + if major.to_i >= 1 && minor.to_i >= 9 + REGEXEN[:list_name] = /^[a-zA-Z\u0080-\u00ff].{0,79}$/ + else + REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/ + end # Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x") LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o @@ -49,12 +54,14 @@ # Allow URL paths to contain balanced parens # 1. Used in Wikipedia URLs like /Primer_(film) # 2. Used in IIS sessions like /S(dfd346)/ REGEXEN[:wikipedia_disambiguation] = /(?:\([^\)]+\))/i + # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user REGEXEN[:valid_url_path_chars] = /(?: #{REGEXEN[:wikipedia_disambiguation]}| - [\.\,]?[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~@] + @[^\/]+\/| + [\.\,]?[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~] )/ix # Valid end-of-path chracters (so /foo. does not gobble the period). # 1. Allow =&# for empty URL parameters and other URL-join artifacts REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=#\/]/i REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i