lib/iso-639.rb in iso-639-0.3.5 vs lib/iso-639.rb in iso-639-0.3.6

- old
+ new

@@ -14,40 +14,46 @@ # Dataset Source: # https://www.loc.gov/standards/iso639-2/ascii_8bits.html # https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt ISO_639_2 = lambda do dataset = [] + File.open( File.join(File.dirname(__FILE__), 'data', 'ISO-639-2_utf-8.txt'), 'r:bom|utf-8' ) do |file| CSV.new(file, **{ col_sep: '|' }).each do |row| dataset << self[*row.map { |v| v || '' }].freeze end end + return dataset end.call.freeze # An inverted index generated from the ISO_639_2 data. Used for searching # all words and codes in all fields. INVERTED_INDEX = lambda do index = {} + ISO_639_2.each_with_index do |record, i| record.each do |field| downcased = field.downcase + words = ( downcased.split(/[[:blank:]]|\(|\)|,|;/) + downcased.split(/;/) ) + words.each do |word| unless word.empty? index[word] ||= [] - index[word] << i + index[word] << i end end end end + return index end.call.freeze # The ISO 639-1 dataset as an array of entries. Each entry is an array with # the following format: @@ -64,20 +70,21 @@ # Returns the entry array for an alpha-2 or alpha-3 code def find_by_code(code) return if code.nil? case code.length - when 3 + when 3, 7 ISO_639_2.detect do |entry| entry if [entry.alpha3, entry.alpha3_terminologic].include?(code) end when 2 ISO_639_1.detect do |entry| entry if entry.alpha2 == code end end end + alias_method :find, :find_by_code # Returns the entry array for a language specified by its English name. def find_by_english_name(name) ISO_639_2.detect do |entry| @@ -94,20 +101,23 @@ # Returns an array of matches for the search term. The term can be a code # of any kind, or it can be one of the words contained in the English or # French name field. def search(term) - term ||= '' + term ||= '' + normalized_term = term.downcase.strip indexes = INVERTED_INDEX[normalized_term] + indexes ? ISO_639_2.values_at(*indexes).uniq : [] end end # The entry's alpha-3 bibliotigraphic code. def alpha3_bibliographic self[0] end + alias_method :alpha3, :alpha3_bibliographic # The entry's alpha-3 terminologic (when given) def alpha3_terminologic self[1]