lib/gimchi/korean.rb in gimchi-0.1.0 vs lib/gimchi/korean.rb in gimchi-0.1.1
- old
+ new
@@ -3,127 +3,119 @@
module Gimchi
class Korean
DEFAULT_CONFIG_FILE_PATH =
File.dirname(__FILE__) + '/../../config/default.yml'
+ # Returns the YAML configuration used by this Korean instance.
+ # @return [String]
attr_reader :config
- attr_accessor :pronouncer
# Initialize Gimchi::Korean.
- # You can override many part of the implementation with customized config file.
+ # @param [String] config_file You can override many parts of the implementation by customizing config file
def initialize config_file = DEFAULT_CONFIG_FILE_PATH
require 'yaml'
@config = YAML.load(File.read config_file)
@config.freeze
- @pronouncer = Korean::Pronouncer.new(self)
+ @pronouncer = Korean::Pronouncer.send :new, self
end
- # Array of chosung's
+ # Array of chosung's.
+ #
+ # @return [Array] Array of chosung strings
def chosungs
config['structure']['chosung']
end
- # Array of jungsung's
+ # Array of jungsung's.
+ # @return [Array] Array of jungsung strings
def jungsungs
config['structure']['jungsung']
end
- # Array of jongsung's
+ # Array of jongsung's.
+ # @return [Array] Array of jongsung strings
def jongsungs
config['structure']['jongsung']
end
- # Checks if the given character is a korean character
+ # Checks if the given character is a korean character.
+ # @param [String] ch A string of size 1
def korean_char? ch
raise ArgumentError.new('Lengthy input') if ch.length > 1
complete_korean_char?(ch) ||
(chosungs + jungsungs + jongsungs).include?(ch)
end
# Checks if the given character is a "complete" korean character.
# "Complete" Korean character must have chosung and jungsung, with optional jongsung.
+ # @param [String] ch A string of size 1
def complete_korean_char? ch
raise ArgumentError.new('Lengthy input') if ch.length > 1
# Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
end
- # Splits the given string into an array of Korean::Char's and strings.
+ # Splits the given string into an array of Korean::Char's and Strings of length 1.
+ # @param [String] str Input string.
+ # @return [Array] Mixed array of Korean::Char instances and Strings of length 1 (for non-korean characters)
def dissect str
str.each_char.map { |c|
korean_char?(c) ? Korean::Char.new(self, c) : c
}
end
- # Reads a string with numbers in Korean way.
+ # Reads numeric expressions in Korean way.
+ # @param [String, Number] str Numeric type or String containing numeric expressions
+ # @return [String] Output string
def read_number str
nconfig = config['number']
str.to_s.gsub(/([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+)?(\s*.)?/) {
read_number_sub($&, $3)
}
end
# Returns the pronunciation of the given string containing Korean characters.
# Takes optional options hash.
- # - If :pronounce_each_char is true, each character of the string is pronounced respectively.
- # - If :slur is true, characters separated by whitespaces are treated as if they were contiguous.
- # - If :number is true, numberic parts of the string is also pronounced in Korean.
- # - :except array allows you to skip certain transformations.
+ #
+ # @param [String] Input string
+ # @param [Boolean] options[:pronounce_each_char] Each character of the string is pronounced respectively.
+ # @param [Boolean] options[:slur] Strings separated by whitespaces are processed again as if they were contiguous.
+ # @param [Boolean] options[:number] Numberic parts of the string is also pronounced in Korean.
+ # @param [Array] options[:except] Allows you to skip certain transformations.
+ # @return [String] Output string
def pronounce str, options = {}
options = {
:pronounce_each_char => false,
:slur => false,
:number => true,
:except => [],
:debug => false
}.merge options
str = read_number(str) if options[:number]
- chars = dissect str
- transforms = []
- idx = -1
- while (idx += 1) < chars.length
- c = chars[idx]
+ result, transforms = @pronouncer.send :pronounce!, str, options
- next if c.is_a?(Korean::Char) == false
-
- next_c = chars[idx + 1]
- next_kc = (options[:pronounce_each_char] == false &&
- next_c.is_a?(Korean::Char) &&
- next_c.complete?) ? next_c : nil
-
- transforms += @pronouncer.transform(c, next_kc, :except => options[:except])
-
- # Slur (TBD)
- if options[:slur] && options[:pronounce_each_char] == false && next_c =~ /\s/
- chars[(idx + 1)..-1].each_with_index do | nc, new_idx |
- next if nc =~ /\s/
-
- if nc.is_a?(Korean::Char) && nc.complete?
- transforms += @pronouncer.transform(c, nc, :except => options[:except])
- end
-
- idx = idx + 1 + new_idx - 1
- break
- end
- end
- end
-
if options[:debug]
- return chars.join, transforms
+ return result, transforms
else
- chars.join
+ return result
end
end
# Returns the romanization (alphabetical notation) of the given Korean string.
# http://en.wikipedia.org/wiki/Korean_romanization
+ # @param [String] str Input Korean string
+ # @param [Boolean] options[:as_pronounced] If true, #pronounce is internally called before romanize
+ # @param [Boolean] options[:number] Whether to read numeric expressions in the string
+ # @param [Boolean] options[:slur] Same as :slur in #pronounce
+ # @return [String] Output string in Roman Alphabet
+ # @see Korean#pronounce
def romanize str, options = {}
options = {
:as_pronounced => true,
:number => true,
:slur => false
@@ -140,26 +132,40 @@
:slur => options[:slur],
# 제1항 [붙임 1] ‘ㅢ’는 ‘ㅣ’로 소리 나더라도 ‘ui’로 적는다.
:except => %w[rule_5_3]
dash = rdata[0]["ㅇ"]
romanization = ""
- (chars = str.each_char.to_a).each_with_index do | kc, cidx |
- if korean_char? kc
- Korean::Char.new(self, kc).to_a.each_with_index do | comp, idx |
+
+ romanize_chunk = lambda do | chunk |
+ dissect(chunk).each do | kc |
+ kc.to_a.each_with_index do | comp, idx |
next if comp.nil?
comp = rdata[idx][comp] || comp
comp = comp[1..-1] if comp[0] == dash &&
(romanization.empty? || romanization[-1] =~ /\s/ || comp[1] == 'w')
romanization += comp
end
- else
- romanization += kc
end
+
+ return post_subs.keys.inject(romanization) { | output, pattern |
+ output.gsub(pattern, post_subs[pattern])
+ }
end
- post_subs.keys.inject(romanization) { | output, pattern |
- output.gsub(pattern, post_subs[pattern])
- }.capitalize
+ k_chunk = ""
+ str.each_char do | c |
+ if korean_char? c
+ k_chunk += c
+ else
+ unless k_chunk.empty?
+ romanization = romanize_chunk.call k_chunk
+ k_chunk = ""
+ end
+ romanization += c
+ end
+ end
+ romanization = romanize_chunk.call k_chunk unless k_chunk.empty?
+ romanization
end
private
def read_number_sub num, next_char = nil
nconfig = config['number']