lib/person_name/name_splitter.rb in person-name-0.2.4 vs lib/person_name/name_splitter.rb in person-name-0.2.5

- old
+ new

@@ -1,71 +1,105 @@ module PersonName class NameSplitter NAME_PARTS = %w(prefix first_name middle_name intercalation last_name suffix) + DEFAULT_PREFIXES = %w(Mr. Dhr. Drs. Prof. Mevr. Mej. Mrs.) + DEFAULT_SUFFIXES = %w(jr. Phd.) + + # Splits the given name in individual name parts in form of a hash. + # You can supply a hash with existing values for editing purposes. def self.split(new_name_str, existing_values = {}) parts = new_name_str.split " " names = [] stage = :prefix + + # value to remember for merging remember = nil parts.each_with_index do |part, index| + + # some helper variables is_upcase = (part[0,1] == part[0,1].upcase) has_dash = part.include? "-" is_last = (parts.length - 1) == index - fp = [remember, part].compact.join(" ") + # add the remember part to the current part but make a note if we used a remembered value + full_part = [remember, part].compact.join(" ") + did_remember = (full_part != part) + suffix_remember = remember remember = nil - did_remember = (fp != part) - if valid_prefix?(part) and stage == :prefix # if the part is a valid prefix, mark it as such - names = add_to_last_if :prefix, fp, names - elsif valid_suffix?(part) and stage == :name # if the part is a valid suffix, mark it as such - names = add_to_last_if :suffix, fp, names - elsif part == "-" # if the part is a dash - if last_stored = names.pop # retrieve the previous name part (if any) and store it with the dash - # for the part to come (remember) - remember = [last_stored[0], fp].compact.join(" ") - else - # if there is no previous part, just store the current part for later - remember = fp - end - elsif !is_upcase and !did_remember # intercalation words are never with a capital - names = add_to_last_if :intercalation, fp, names - stage = :name - elsif !is_upcase and did_remember - remember = fp - elsif is_upcase and !has_dash - names << [fp, :name] - stage = :name - elsif is_upcase and has_dash - if fp.ends_with? "-" - if is_last - names << [fp, :name] - stage = :name + + # Mr. Mrs. Mevr. Mej. + if valid_prefix?(part) and stage == :prefix and not did_remember # if the part is a valid prefix, mark it as such + names = add_to_last_if :prefix, part, names + + # Jr. Phd. + elsif valid_suffix?(part) # if the part is a valid suffix, mark it as such + names << [suffix_remember, :name] if did_remember + names = add_to_last_if :suffix, part, names + else + remember = nil + + # Damen - van Valkenburg + if part == "-" # if the part is a dash + if last_stored = names.pop # retrieve the previous name part (if any) and store it with the dash + # for the part to come (remember) + remember = [last_stored[0], full_part].compact.join(" ") else - remember = fp + # if there is no previous part, just store the current part for later + remember = full_part end - else - if fp.starts_with?("-") and last_stored = names.pop - fp = [last_stored[0], fp].compact.join(" ") - end - dash_parts = fp.split "-" - if dash_parts.last.first == dash_parts.last.first.upcase - names << [fp, :name] - stage = :name - elsif is_last - names << [fp, :name] - stage = :name + + # van de Sloot + elsif !is_upcase and !did_remember # intercalation words are never with a capital + names = add_to_last_if :intercalation, full_part, names + stage = :name + + # Groen in het Woud + elsif !is_upcase and did_remember + remember = full_part + + # Groen + elsif is_upcase and !has_dash + names << [full_part, :name] + stage = :name + + # Groen-teboer + elsif is_upcase and has_dash + if full_part.ends_with? "-" + if is_last + names << [full_part, :name] + stage = :name + else + remember = full_part + end else - remember = fp + if full_part.starts_with?("-") and last_stored = names.pop + last_stored_name = last_stored[0].split(" ") + last_part = last_stored_name.pop + names << [last_stored_name.join(" "), last_stored[1]] unless last_stored_name.empty? + full_part = [last_part, full_part].compact.join(" ") + end + dash_parts = full_part.split "-" + if dash_parts.last[0,1] == dash_parts.last[0,1].upcase + names << [full_part, :name] + stage = :name + elsif is_last + names << [full_part, :name] + stage = :name + else + remember = full_part + end end end end end + names << [remember, :name] if remember new_name = {} stage = O[:prefix] + #puts names.inspect names.each_with_index do |value, index| name, name_type = *value stage = recheck_stage(name, stage, existing_values) @@ -137,14 +171,14 @@ end list end def self.valid_prefix? name_part - false + DEFAULT_PREFIXES.include? name_part end def self.valid_suffix? name_part - false + DEFAULT_SUFFIXES.include? name_part end end end \ No newline at end of file