lib/regex.rb in twitter-text-1.4.7 vs lib/regex.rb in twitter-text-1.4.8

- old
+ new

@@ -57,22 +57,23 @@ REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o CJ_HASHTAG_CHARACTERS = [ - (0x30A1..0x30FA).to_a, # Katakana (full-width) - (0xFF66..0xFF9E).to_a, # Katakana (half-width) + (0x30A1..0x30FA).to_a, 0x30FC, # Katakana (full-width) + (0xFF66..0xFF9F).to_a, # Katakana (half-width) (0xFF10..0xFF19).to_a, (0xFF21..0xFF3A).to_a, (0xFF41..0xFF5A).to_a, # Latin (full-width) (0x3041..0x3096).to_a, # Hiragana (0x3400..0x4DBF).to_a, # Kanji (CJK Extension A) (0x4E00..0x9FFF).to_a, # Kanji (Unified) (0x20000..0x2A6DF).to_a, # Kanji (CJK Extension B) (0x2A700..0x2B73F).to_a, # Kanji (CJK Extension C) (0x2B740..0x2B81F).to_a, # Kanji (CJK Extension D) - (0x2F800..0x2FA1F).to_a # Kanji (CJK supplement) + (0x2F800..0x2FA1F).to_a, # Kanji (CJK supplement) + 0x3005 # Kanji (iteration mark) ].flatten.pack('U*').freeze - HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!)/ + HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|!|?|,)/ # A hashtag must contain latin characters, numbers and underscores, but not all numbers. HASHTAG_ALPHA = /[a-z_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io HASHTAG_ALPHANUMERIC = /[a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io