lib/phonetics.rb in phonetics-1.5.1 vs lib/phonetics.rb in phonetics-1.5.2

- old
+ new

@@ -1,5 +1,7 @@ +# frozen_string_literal: true + require 'delegate' module Phonetics extend self @@ -8,11 +10,10 @@ # # Usage: # Phonetics::String.new("wətɛvɝ").each_phoneme.to_a # => ["w", "ə", "t", "ɛ", "v", "ɝ"] class String < SimpleDelegator - # Group all phonemes by how many characters they have. Use this to walk # through a string finding phonemes (looking for longest ones first) def self.phonemes_by_length @phonemes_by_length ||= Phonetics.phonemes.each_with_object( # This relies on the impicit stable key ordering of Hash objects in Ruby @@ -27,20 +28,19 @@ idx = 0 Enumerator.new do |y| while idx < chars.length found = false self.class.phonemes_by_length.each do |size, phonemes| - if idx + size <= chars.length - candidate = chars[idx..idx+size-1].join - if phonemes.include?(candidate) - y.yield candidate - idx += size - found = true - break - else - end - end + next unless idx + size <= chars.length + + candidate = chars[idx..idx + size - 1].join + next unless phonemes.include?(candidate) + + y.yield candidate + idx += size + found = true + break end idx += 1 unless found end end end @@ -74,11 +74,11 @@ 'ɯ' => { F1: 300, F2: 1390, rounded: false }, 'æ' => { F1: 800, F2: 1900, rounded: false }, # Guessing From other vowels 'u' => { F1: 350, F2: 650, rounded: true }, # Guessing From other vowels 'ʊ' => { F1: 350, F2: 650, rounded: true }, # Frequencies from http://videoweb.nie.edu.sg/phonetic/vowels/measurements.html - } + }.freeze def phonemes @phonemes ||= FormantFrequencies.keys end @@ -88,12 +88,12 @@ # TODO: account for rhoticity (F3) def distance(phoneme1, phoneme2) formants1 = FormantFrequencies.fetch(phoneme1) formants2 = FormantFrequencies.fetch(phoneme2) - @minmax_f1 ||= FormantFrequencies.values.minmax {|a, b| a[:F1] <=> b[:F1] }.map {|h| h[:F1] } - @minmax_f2 ||= FormantFrequencies.values.minmax {|a, b| a[:F2] <=> b[:F2] }.map {|h| h[:F2] } + @minmax_f1 ||= FormantFrequencies.values.minmax { |a, b| a[:F1] <=> b[:F1] }.map { |h| h[:F1] } + @minmax_f2 ||= FormantFrequencies.values.minmax { |a, b| a[:F2] <=> b[:F2] }.map { |h| h[:F2] } # Get an x and y value for each input phoneme scaled between 0.0 and 1.0 # We'll use the scaled f1 as the 'x' and the scaled f2 as the 'y' scaled_phoneme1_f1 = (formants1[:F1] - @minmax_f1[0]) / @minmax_f1[1].to_f scaled_phoneme1_f2 = (formants1[:F2] - @minmax_f2[0]) / @minmax_f2[1].to_f @@ -103,53 +103,56 @@ f1_distance = (scaled_phoneme1_f1 - scaled_phoneme2_f1).abs f2_distance = (scaled_phoneme1_f2 - scaled_phoneme2_f2).abs # When we have four values we can use the pythagorean theorem on them # (order doesn't matter) - Math.sqrt((f1_distance ** 2) + (f2_distance ** 2)) + Math.sqrt((f1_distance**2) + (f2_distance**2)) end end module Consonants extend self # Plosives and fricatives are less similar than trills and flaps, or # sibilant fricatives and non-sibilant fricatives # TODO: this is unfinished and possibly a bad idea MannerDistances = { - 'Nasal' => %w[continuant], - 'Stop' => %w[], - 'Sibilant fricative' => %w[continuant fricative], + 'Nasal' => %w[continuant], + 'Stop' => %w[], + 'Sibilant fricative' => %w[continuant fricative], 'Non-sibilant fricative' => %w[continuant non_sibilant fricative], - 'Approximant' => %w[], - 'Tap/Flap' => %w[], - 'Trill' => %w[], - 'Lateral fricative' => %w[continuant fricative], - 'Lateral approximant' => %w[], - 'Lateral tap/flap' => %w[], + 'Approximant' => %w[], + 'Tap/Flap' => %w[], + 'Trill' => %w[], + 'Lateral fricative' => %w[continuant fricative], + 'Lateral approximant' => %w[], + 'Lateral tap/flap' => %w[], }.freeze # This chart (columns 2 through the end, anyway) is a direct port of # https://en.wikipedia.org/wiki/International_Phonetic_Alphabet#Letters - # We # store the consonant table in this format to make updating it easier. - ChartData = %Q{ | Labio-velar | Bi-labial | Labio-dental | Linguo-labial | Dental | Alveolar | Post-alveolar | Retro-flex | Palatal | Velar | Uvular | Pharyngeal | Glottal + # We store the consonant table in this format to make updating it easier. + # + # rubocop:disable Layout/TrailingWhitespace + ChartData = %( | Labio-velar | Bi-labial | Labio-dental | Linguo-labial | Dental | Alveolar | Post-alveolar | Retro-flex | Palatal | Velar | Uvular | Pharyngeal | Glottal Nasal | | m̥ m | ɱ | n̼ | | n̥ n | | ɳ̊ ɳ | ɲ̊ ɲ | ŋ̊ ŋ | ɴ | | Stop | | p b | p̪ b̪ | t̼ d̼ | | t d | | ʈ ɖ | c ɟ | k g | q ɢ | ʡ | ʔ Sibilant fricative | | | | | | s z | ʃ ʒ | ʂ ʐ | ɕ ʑ | | | | Non-sibilant fricative | | ɸ β | f v | θ̼ ð̼ | θ ð | θ̠ ð̠ | ɹ̠̊˔ ɹ̠˔ | ɻ˔ | ç ʝ | x ɣ | χ ʁ | ħ ʕ | h ɦ Approximant | w | | ʋ̥ ʋ | | | ɹ̥ ɹ | | ɻ̊ ɻ | j̊ j | ɰ̊ ɰ | | | ʔ̞ Tap/flap | | ⱱ̟ | ⱱ | ɾ̼ | | ɾ̥ ɾ | | ɽ̊ ɽ | | | ɢ̆ | ʡ̆ | Trill | | ʙ̥ ʙ | | | | r̥ r | | | | | ʀ̥ ʀ | ʜ ʢ | Lateral fricative | | | | | | ɬ ɮ | | ɭ̊˔ ɭ˔ | ʎ̝̊ ʎ̝ | ʟ̝̊ ʟ̝ | | | Lateral approximant | | | | | | l̥ l | | ɭ̊ ɭ | ʎ̥ ʎ | ʟ̥ ʟ | ʟ̠ | | Lateral tap/flap | | | | | | ɺ | | ɭ̆ | ʎ̆ | ʟ̆ | | | - } + ) + # rubocop:enable Layout/TrailingWhitespace # Parse the ChartData into a lookup table where we can retrieve attributes # for each phoneme def features - @features ||= begin + @features ||= begin header, *manners = ChartData.lines _, *positions = header.chomp.split(' | ') positions.map(&:strip!) @@ -163,13 +166,13 @@ manners.each_with_object({}) do |row, phonemes| manner, *columns = row.chomp.split(' | ') manner.strip! positions.zip(columns).each do |position, phoneme_text| data = { - position: position, + position: position, position_index: position_indexes[position], - manner: manner, + manner: manner, } # If there is a character in the first byte then this articulation # has a voiceless phoneme. The symbol may use additional characters # as part of the phoneme symbol. unless phoneme_text[0] == ' ' @@ -216,23 +219,24 @@ def phonemes Consonants.phonemes + Vowels.phonemes end - Symbols = Consonants.phonemes.reduce({}) {|acc, p| acc.update p => :consonant }.merge( + Symbols = Consonants.phonemes.reduce({}) { |acc, p| acc.update p => :consonant }.merge( Vowels.phonemes.reduce({}) { |acc, p| acc.update p => :vowel } ) def distance(phoneme1, phoneme2) return 0 if phoneme1 == phoneme2 + distance_map.fetch(phoneme1).fetch(phoneme2) end def distance_map @distance_map ||= ( Vowels.phonemes + Consonants.phonemes - ).permutation(2).each_with_object(Hash.new { |h, k| h[k] = {} } ) do |pair, scores| + ).permutation(2).each_with_object(Hash.new { |h, k| h[k] = {} }) do |pair, scores| p1, p2 = *pair score = _distance(p1, p2) scores[p1][p2] = score scores[p2][p1] = score end @@ -251,11 +255,11 @@ # # grapheme_as_utf_8_long("ɰ̊") # => 1413 (624 + (10 * 778)) def grapheme_as_utf_8_long(grapheme) grapheme.unpack('U*').each_with_index.reduce(0) do |total, (byte, i)| - total += (10**i) * byte + total + (10**i) * byte end end # This will print a C code file with a function that implements a two-level C # switch like the following: @@ -297,11 +301,11 @@ writer.puts " case #{a_i}: // #{a}" writer.puts ' switch (b) {' distances.each do |(b, b_i), distance| writer.puts " case #{b_i}: // #{a}->#{b}" writer.puts " return (float) #{distance};" - writer.puts " break;" + writer.puts ' break;' end writer.puts ' }' end writer.puts ' }' writer.puts ' return 1.0;' @@ -310,14 +314,14 @@ private def _distance(phoneme1, phoneme2) types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort - if types == [:consonant, :vowel] + if types == %i[consonant vowel] 1.0 - elsif types == [:vowel, :vowel] + elsif types == %i[vowel vowel] Vowels.distance(phoneme1, phoneme2) - elsif types == [:consonant, :consonant] + elsif types == %i[consonant consonant] Consonants.distance(phoneme1, phoneme2) end end end