lib/regex.rb in twitter-text-1.0.1 vs lib/regex.rb in twitter-text-1.0.2

- old
+ new

@@ -24,12 +24,13 @@ 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE ].flatten.freeze REGEXEN[:spaces] = Regexp.new(UNICODE_SPACES.collect{ |e| [e].pack 'U*' }.join('|')) - REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])[@@]([a-zA-Z0-9_]{1,20})(?!@)/ - REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*[@@]([a-zA-Z0-9_]{1,20})/o + REGEXEN[:at_signs] = /[@@]/ + REGEXEN[:extract_mentions] = /(^|[^a-zA-Z0-9_])#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})(?=(.|$))/o + REGEXEN[:extract_reply] = /^(?:#{REGEXEN[:spaces]})*#{REGEXEN[:at_signs]}([a-zA-Z0-9_]{1,20})/o REGEXEN[:list_name] = /^[a-zA-Z\x80-\xff].{0,79}$/ # Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x") LATIN_ACCENTS = [(0xc0..0xd6).to_a, (0xd8..0xf6).to_a, (0xf8..0xff).to_a].flatten.pack('U*').freeze @@ -40,21 +41,21 @@ REGEXEN[:auto_link_hashtags] = /(^|[^0-9A-Z&\/]+)(#|#)([0-9A-Z_]*[A-Z_]+#{HASHTAG_CHARACTERS}*)/io REGEXEN[:auto_link_usernames_or_lists] = /([^a-zA-Z0-9_]|^)([@@]+)([a-zA-Z0-9_]{1,20})(\/[a-zA-Z][a-zA-Z0-9\x80-\xff\-]{0,79})?/ REGEXEN[:auto_link_emoticon] = /(8\-\#|8\-E|\+\-\(|\`\@|\`O|\<\|:~\(|\}:o\{|:\-\[|\>o\<|X\-\/|\[:-\]\-I\-|\/\/\/\/Ö\\\\\\\\|\(\|:\|\/\)|∑:\*\)|\( \| \))/ # URL related hash regex collection - REGEXEN[:valid_preceeding_chars] = /(?:[^\/"':!=]|^|\:)/ - REGEXEN[:valid_domain] = /(?:[\.-]|[^[:punct:]])+\.[a-z]{2,}(?::[0-9]+)?/i - REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i + REGEXEN[:valid_preceding_chars] = /(?:[^\/"':!=]|^|\:)/ + REGEXEN[:valid_domain] = /(?:[\.-]|[^[:punct:]\s])+\.[a-z]{2,}(?::[0-9]+)?/i + REGEXEN[:valid_url_path_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~@]/i # Valid end-of-path chracters (so /foo. does not gobble the period). # 1. Allow ) for Wikipedia URLs. # 2. Allow =&# for empty URL parameters and other URL-join artifacts REGEXEN[:valid_url_path_ending_chars] = /[a-z0-9\)=#\/]/i REGEXEN[:valid_url_query_chars] = /[a-z0-9!\*'\(\);:&=\+\$\/%#\[\]\-_\.,~]/i REGEXEN[:valid_url_query_ending_chars] = /[a-z0-9_&=#]/i REGEXEN[:valid_url] = %r{ ( # $1 total match - (#{REGEXEN[:valid_preceeding_chars]}) # $2 Preceeding chracter + (#{REGEXEN[:valid_preceding_chars]}) # $2 Preceeding chracter ( # $3 URL (https?:\/\/|www\.) # $4 Protocol or beginning (#{REGEXEN[:valid_domain]}) # $5 Domain(s) and optional post number (/#{REGEXEN[:valid_url_path_chars]}*#{REGEXEN[:valid_url_path_ending_chars]}?)? # $6 URL Path (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $7 Query String