lib/iso-639.rb in iso-639-0.3.5 vs lib/iso-639.rb in iso-639-0.3.6
- old
+ new
@@ -14,40 +14,46 @@
# Dataset Source:
# https://www.loc.gov/standards/iso639-2/ascii_8bits.html
# https://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
ISO_639_2 = lambda do
dataset = []
+
File.open(
File.join(File.dirname(__FILE__), 'data', 'ISO-639-2_utf-8.txt'),
'r:bom|utf-8'
) do |file|
CSV.new(file, **{ col_sep: '|' }).each do |row|
dataset << self[*row.map { |v| v || '' }].freeze
end
end
+
return dataset
end.call.freeze
# An inverted index generated from the ISO_639_2 data. Used for searching
# all words and codes in all fields.
INVERTED_INDEX = lambda do
index = {}
+
ISO_639_2.each_with_index do |record, i|
record.each do |field|
downcased = field.downcase
+
words = (
downcased.split(/[[:blank:]]|\(|\)|,|;/) +
downcased.split(/;/)
)
+
words.each do |word|
unless word.empty?
index[word] ||= []
- index[word] << i
+ index[word] << i
end
end
end
end
+
return index
end.call.freeze
# The ISO 639-1 dataset as an array of entries. Each entry is an array with
# the following format:
@@ -64,20 +70,21 @@
# Returns the entry array for an alpha-2 or alpha-3 code
def find_by_code(code)
return if code.nil?
case code.length
- when 3
+ when 3, 7
ISO_639_2.detect do |entry|
entry if [entry.alpha3, entry.alpha3_terminologic].include?(code)
end
when 2
ISO_639_1.detect do |entry|
entry if entry.alpha2 == code
end
end
end
+
alias_method :find, :find_by_code
# Returns the entry array for a language specified by its English name.
def find_by_english_name(name)
ISO_639_2.detect do |entry|
@@ -94,20 +101,23 @@
# Returns an array of matches for the search term. The term can be a code
# of any kind, or it can be one of the words contained in the English or
# French name field.
def search(term)
- term ||= ''
+ term ||= ''
+
normalized_term = term.downcase.strip
indexes = INVERTED_INDEX[normalized_term]
+
indexes ? ISO_639_2.values_at(*indexes).uniq : []
end
end
# The entry's alpha-3 bibliotigraphic code.
def alpha3_bibliographic
self[0]
end
+
alias_method :alpha3, :alpha3_bibliographic
# The entry's alpha-3 terminologic (when given)
def alpha3_terminologic
self[1]