lib/regex.rb in twitter-text-1.4.7 vs lib/regex.rb in twitter-text-1.4.8
- old
+ new
@@ -57,22 +57,23 @@
REGEXEN[:latin_accents] = /[#{LATIN_ACCENTS}]+/o
REGEXEN[:end_screen_name_match] = /^(?:#{REGEXEN[:at_signs]}|#{REGEXEN[:latin_accents]}|:\/\/)/o
CJ_HASHTAG_CHARACTERS = [
- (0x30A1..0x30FA).to_a, # Katakana (full-width)
- (0xFF66..0xFF9E).to_a, # Katakana (half-width)
+ (0x30A1..0x30FA).to_a, 0x30FC, # Katakana (full-width)
+ (0xFF66..0xFF9F).to_a, # Katakana (half-width)
(0xFF10..0xFF19).to_a, (0xFF21..0xFF3A).to_a, (0xFF41..0xFF5A).to_a, # Latin (full-width)
(0x3041..0x3096).to_a, # Hiragana
(0x3400..0x4DBF).to_a, # Kanji (CJK Extension A)
(0x4E00..0x9FFF).to_a, # Kanji (Unified)
(0x20000..0x2A6DF).to_a, # Kanji (CJK Extension B)
(0x2A700..0x2B73F).to_a, # Kanji (CJK Extension C)
(0x2B740..0x2B81F).to_a, # Kanji (CJK Extension D)
- (0x2F800..0x2FA1F).to_a # Kanji (CJK supplement)
+ (0x2F800..0x2FA1F).to_a, # Kanji (CJK supplement)
+ 0x3005 # Kanji (iteration mark)
].flatten.pack('U*').freeze
- HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!)/
+ HASHTAG_BOUNDARY = /(?:\A|\z|#{REGEXEN[:spaces]}|「|」|。|、|\.|!|\?|!|?|,)/
# A hashtag must contain latin characters, numbers and underscores, but not all numbers.
HASHTAG_ALPHA = /[a-z_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io
HASHTAG_ALPHANUMERIC = /[a-z0-9_#{LATIN_ACCENTS}#{NON_LATIN_HASHTAG_CHARS}#{CJ_HASHTAG_CHARACTERS}]/io