Sha256: d5274674a8e4a07ffbdac05b239f2cb8b1a52e5e1fb59c2b61a1b92a12c7255d

Contents?: true

Size: 1.76 KB

Versions: 1

Compression:

Stored size: 1.76 KB

Contents

# frozen_string_literal: true

require_relative "unihan_lang/version"
require_relative "unihan_lang/chinese_processor"

module UnihanLang
  class Unihan
    def initialize
      @chinese_processor = ChineseProcessor.new
    end

    def zh_tw?(text)
      language_ratio(text) == :tw
    end

    def zh_cn?(text)
      language_ratio(text) == :cn
    end

    def only_zh_tw?(text)
      text.chars.all? { |char| @chinese_processor.only_zh_tw?(char) }
    end

    def only_zh_cn?(text)
      text.chars.all? { |char| @chinese_processor.only_zh_cn?(char) }
    end

    def contains_zh_tw?(text)
      text.chars.any? { |char| @chinese_processor.only_zh_tw?(char) }
    end

    def contains_zh_cn?(text)
      text.chars.any? { |char| @chinese_processor.only_zh_cn?(char) }
    end

    def contains_chinese?(text)
      text.chars.any? { |char| @chinese_processor.chinese_character?(char) }
    end

    def extract_chinese_characters(text)
      text.chars.select { |char| @chinese_processor.chinese_character?(char) }
    end

    def determine_language(text)
      case language_ratio(text)
      when :ja then "JA"
      when :tw then "ZH_TW"
      when :cn then "ZH_CN"
      else "Unknown"
      end
    end

    private

    # テキストの言語比率を計算し、最も可能性の高い言語を返す
    def language_ratio(text)
      only_tw_chars = text.chars.count { |char| @chinese_processor.only_zh_tw?(char) }
      only_cn_chars = text.chars.count { |char| @chinese_processor.only_zh_cn?(char) }
      chinese_chars = text.chars.count { |char| @chinese_processor.chinese?(char) }

      return :unknown unless chinese_chars == text.length
      return :tw if only_tw_chars > only_cn_chars
      return :cn if only_cn_chars >= only_tw_chars

      :unknown
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
unihan_lang-0.1.0 lib/unihan_lang.rb