regex.rb in twitter-text-1.1.5

- old
+ new

@@ -1,6 +1,6 @@
-# coding: UTF-8
+# encoding: utf-8
 
 module Twitter
   # A collection of regular expressions for parsing Tweet text. The regular expression
   # list is frozen at load time to ensure immutability. These reular expressions are
   # used throughout the <tt>Twitter</tt> classes. Special care has been taken to make
@@ -29,11 +29,16 @@
 
     REGEXEN[:at_signs] = /[@＠]/
     REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o
     REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o
 
-    REGEXEN[:list_name] = /^[a-zA-Z\u0080-\u00ff].{0,79}$/
+    major, minor, patch = RUBY_VERSION.split(/\./)
+    if major.to_i >= 1 && minor.to_i >= 9
+      REGEXEN[:list_name] = /^[a-zA-Z\u0080-\u00ff].{0,79}$/
+    else
+      REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/
+    end
 
     # Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x")
     LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze
     REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
 
@@ -49,12 +54,14 @@
 
     # Allow URL paths to contain balanced parens
     #  1. Used in Wikipedia URLs like /Primer_(film)
     #  2. Used in IIS sessions like /S(dfd346)/
     REGEXEN[:wikipedia_disambiguation] = /(?:\([^\)]+\))/i
+    # Allow @ in a url, but only in the middle. Catch things like http://example.com/@user
     REGEXEN[:valid_url_path_chars] = /(?:
       #{REGEXEN[:wikipedia_disambiguation]}|
-      [\.\,]?[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~@]
+      @[^\/]+\/|
+      [\.\,]?[a-z0-9!\*';:=\+\$\/%#\[\]\-_,~]
     )/ix
     # Valid end-of-path chracters (so /foo. does not gobble the period).
     #   1. Allow =&# for empty URL parameters and other URL-join artifacts
     REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9=#\/]/i
     REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i