require 'nkf' module JapanShippingCSV module Encoding # Eg. HALFWIDTH + ROMAJI + NUMBERS can be represented by: # valid_encodings = Encoding::HALFWIDTH | Encoding::ROMAJI | Encoding::NUMBERS # and to figure out whether a flag is set: # romaji_allowed? = valid_encodings & Encoding::ROMAJI # 0 = false, 1 = true (which is not true in ruby) HALFWIDTH = 1 # Hankaku FULLWIDTH = 2 # Zenkaku ROMAJI = 4 NUMBERS = 8 KATAKANA = 16 HYPHEN = 32 module CodePoints # Character set lower/upper bound definitions ## Bounds for Hiragana ZENKAKU_HIRAGANA_FIRST = 'ぁ' # U+3041 ZENKAKU_HIRAGANA_LAST_FOR_CONVERT = 'ん' # U+3093 ZENKAKU_HIRAGANA_LAST = 'ゖ' # U+3096 ## Bounds for Katakana HANKAKU_KATAKANA_FIRST = 'ヲ' # U+FF66 HANKAKU_KATAKANA_LAST = 'ン' # U+FF9D ZENKAKU_KATAKANA_FIRST = 'ァ' # U+30A1 ZENKAKU_KATAKANA_LAST_FOR_CONVERT = 'ン' # U+30F3 ZENKAKU_KATAKANA_LAST = 'ヺ' # U+30FA ## Bounds for Punctuation (kutoten) HANKAKU_PUNCTUATION_FIRST = '。' # U+FF61 HANKAKU_PUNCTUATION_LAST = '゚' # U+FF9F HANKAKU_PUNCTUATION_ONBIKI = 'ー' # U+FF70 ZENKAKU_PUNCTUATION_FIRST = '、' # U+3001 ZENKAKU_PUNCTUATION_LAST = '〜' # U+301C ZENKAKU_PUNCTUATION_HG_FIRST = '゛' # U+309B ZENKAKU_PUNCTUATION_HG_LAST = 'ゞ' # U+309E ZENKAKU_PUNCTUATION_KK_FIRST = '・' # U+30FB ZENKAKU_PUNCTUATION_KK_LAST = 'ヾ' # U+30FE ## Bounds for Numeric HANKAKU_NUMBER_FIRST = '0' # U+0030 HANKAKU_NUMBER_LAST = '9' # U+0039 ZENKAKU_NUMBER_FIRST = '0' # U+FF10 ZENKAKU_NUMBER_LAST = '9' # U+FF19 ## Bounds for Alphabetic HANKAKU_LETTER_UPPER_FIRST = 'A' # U+0041 HANKAKU_LETTER_UPPER_LAST = 'Z' # U+005A HANKAKU_LETTER_LOWER_FIRST = 'a' # U+0061 HANKAKU_LETTER_LOWER_LAST = 'z' # U+007A ZENKAKU_LETTER_UPPER_FIRST = 'A' # U+FF21 ZENKAKU_LETTER_UPPER_LAST = 'Z' # U+FF3A ZENKAKU_LETTER_LOWER_FIRST = 'a' # U+FF41 ZENKAKU_LETTER_LOWER_LAST = 'z' # U+FF5A # Bounds for All Alphanumeric and Symbol ASCII HANKAKU_SPACE = ' ' # U+0020 HANKAKU_ASCII_FIRST = '!' # U+0021 HANKAKU_ASCII_LAST = '~' # U+007E ZENKAKU_SPACE = ' ' # U+3000 ZENKAKU_ASCII_FIRST = '!' # U+FF01 ZENKAKU_ASCII_LAST = '~' # U+FF5E end class << self def reencode(value, encoding_flags) return value if value.nil? result = value # if half-width not allowed half_width_allowed = (encoding_flags & Encoding::HALFWIDTH) == Encoding::HALFWIDTH if half_width_allowed result = NKF.nkf('-X -w', value).tr('0-9a-zA-Z -', '0-9a-zA-Z -') end # if full-width not allowed fullwidth_allowed = (encoding_flags & Encoding::FULLWIDTH) == Encoding::FULLWIDTH if fullwidth_allowed result = NKF.nkf('-X -w', value).tr('0-9a-zA-Z -', '0-9a-zA-Z -').tr(''', '’') end result end def is_romaji(value) return false if value.empty? value.split("").each do |c| # return !(::kana::isHankakuLetter(u) || ::kana::isZenkakuLetter(u)); is_hankaku_upper = c >= CodePoints::HANKAKU_LETTER_UPPER_FIRST && c <= CodePoints::HANKAKU_LETTER_UPPER_LAST is_hankaku_lower = c >= CodePoints::HANKAKU_LETTER_LOWER_FIRST && c <= CodePoints::HANKAKU_LETTER_LOWER_LAST is_zenkaku_upper = c >= CodePoints::ZENKAKU_LETTER_UPPER_FIRST && c <= CodePoints::ZENKAKU_LETTER_UPPER_LAST is_zenkaku_lower = c >= CodePoints::ZENKAKU_LETTER_LOWER_FIRST && c <= CodePoints::ZENKAKU_LETTER_LOWER_LAST return false if !is_hankaku_upper && !is_hankaku_lower && !is_zenkaku_upper && !is_zenkaku_lower end true end def is_number(value) return false if value.empty? value.split("").each do |c| is_hankaku_number = c >= CodePoints::HANKAKU_NUMBER_FIRST && c <= CodePoints::HANKAKU_NUMBER_LAST is_zenkaku_number = c >= CodePoints::ZENKAKU_NUMBER_FIRST && c <= CodePoints::ZENKAKU_NUMBER_LAST return false if !is_hankaku_number && !is_zenkaku_number end true end def is_katakana(value) return false if value.empty? value.split("").each do |c| is_hankaku_katakana = c >= CodePoints::HANKAKU_KATAKANA_FIRST && c <= CodePoints::HANKAKU_KATAKANA_LAST && c != CodePoints::HANKAKU_PUNCTUATION_ONBIKI is_zenkaku_katakana = c >= CodePoints::ZENKAKU_KATAKANA_FIRST && c <= CodePoints::ZENKAKU_KATAKANA_LAST return false if !is_hankaku_katakana && !is_zenkaku_katakana end true end def is_hyphen(value) return false if value.empty? value.split("").each do |c| if c != '-' && c != '-' && c != 'ー' && c != 'ー' return false end end true end def is_valid_string(value, encoding_flags) value.split("").each do |c| # if is_half_width(c) && !(encoding_flags & Encoding::HALFWIDTH) # return false # end # if is_full_width(c) && !(encoding_flags & Encoding::FULLWIDTH) # return false # end if is_romaji(c) && !(encoding_flags & Encoding::ROMAJI) return false end if is_number(c) && !(encoding_flags & Encoding::NUMBERS) return false end if is_katakana(c) && !(encoding_flags & Encoding::KATAKANA) return false end if is_hyphen(c) && !(encoding_flags & Encoding::HYPHEN) return false end end return true end end end end