lib/phonetics.rb in phonetics-1.5.1 vs lib/phonetics.rb in phonetics-1.5.2
- old
+ new
@@ -1,5 +1,7 @@
+# frozen_string_literal: true
+
require 'delegate'
module Phonetics
extend self
@@ -8,11 +10,10 @@
#
# Usage:
# Phonetics::String.new("wətɛvɝ").each_phoneme.to_a
# => ["w", "ə", "t", "ɛ", "v", "ɝ"]
class String < SimpleDelegator
-
# Group all phonemes by how many characters they have. Use this to walk
# through a string finding phonemes (looking for longest ones first)
def self.phonemes_by_length
@phonemes_by_length ||= Phonetics.phonemes.each_with_object(
# This relies on the impicit stable key ordering of Hash objects in Ruby
@@ -27,20 +28,19 @@
idx = 0
Enumerator.new do |y|
while idx < chars.length
found = false
self.class.phonemes_by_length.each do |size, phonemes|
- if idx + size <= chars.length
- candidate = chars[idx..idx+size-1].join
- if phonemes.include?(candidate)
- y.yield candidate
- idx += size
- found = true
- break
- else
- end
- end
+ next unless idx + size <= chars.length
+
+ candidate = chars[idx..idx + size - 1].join
+ next unless phonemes.include?(candidate)
+
+ y.yield candidate
+ idx += size
+ found = true
+ break
end
idx += 1 unless found
end
end
end
@@ -74,11 +74,11 @@
'ɯ' => { F1: 300, F2: 1390, rounded: false },
'æ' => { F1: 800, F2: 1900, rounded: false }, # Guessing From other vowels
'u' => { F1: 350, F2: 650, rounded: true }, # Guessing From other vowels
'ʊ' => { F1: 350, F2: 650, rounded: true },
# Frequencies from http://videoweb.nie.edu.sg/phonetic/vowels/measurements.html
- }
+ }.freeze
def phonemes
@phonemes ||= FormantFrequencies.keys
end
@@ -88,12 +88,12 @@
# TODO: account for rhoticity (F3)
def distance(phoneme1, phoneme2)
formants1 = FormantFrequencies.fetch(phoneme1)
formants2 = FormantFrequencies.fetch(phoneme2)
- @minmax_f1 ||= FormantFrequencies.values.minmax {|a, b| a[:F1] <=> b[:F1] }.map {|h| h[:F1] }
- @minmax_f2 ||= FormantFrequencies.values.minmax {|a, b| a[:F2] <=> b[:F2] }.map {|h| h[:F2] }
+ @minmax_f1 ||= FormantFrequencies.values.minmax { |a, b| a[:F1] <=> b[:F1] }.map { |h| h[:F1] }
+ @minmax_f2 ||= FormantFrequencies.values.minmax { |a, b| a[:F2] <=> b[:F2] }.map { |h| h[:F2] }
# Get an x and y value for each input phoneme scaled between 0.0 and 1.0
# We'll use the scaled f1 as the 'x' and the scaled f2 as the 'y'
scaled_phoneme1_f1 = (formants1[:F1] - @minmax_f1[0]) / @minmax_f1[1].to_f
scaled_phoneme1_f2 = (formants1[:F2] - @minmax_f2[0]) / @minmax_f2[1].to_f
@@ -103,53 +103,56 @@
f1_distance = (scaled_phoneme1_f1 - scaled_phoneme2_f1).abs
f2_distance = (scaled_phoneme1_f2 - scaled_phoneme2_f2).abs
# When we have four values we can use the pythagorean theorem on them
# (order doesn't matter)
- Math.sqrt((f1_distance ** 2) + (f2_distance ** 2))
+ Math.sqrt((f1_distance**2) + (f2_distance**2))
end
end
module Consonants
extend self
# Plosives and fricatives are less similar than trills and flaps, or
# sibilant fricatives and non-sibilant fricatives
# TODO: this is unfinished and possibly a bad idea
MannerDistances = {
- 'Nasal' => %w[continuant],
- 'Stop' => %w[],
- 'Sibilant fricative' => %w[continuant fricative],
+ 'Nasal' => %w[continuant],
+ 'Stop' => %w[],
+ 'Sibilant fricative' => %w[continuant fricative],
'Non-sibilant fricative' => %w[continuant non_sibilant fricative],
- 'Approximant' => %w[],
- 'Tap/Flap' => %w[],
- 'Trill' => %w[],
- 'Lateral fricative' => %w[continuant fricative],
- 'Lateral approximant' => %w[],
- 'Lateral tap/flap' => %w[],
+ 'Approximant' => %w[],
+ 'Tap/Flap' => %w[],
+ 'Trill' => %w[],
+ 'Lateral fricative' => %w[continuant fricative],
+ 'Lateral approximant' => %w[],
+ 'Lateral tap/flap' => %w[],
}.freeze
# This chart (columns 2 through the end, anyway) is a direct port of
# https://en.wikipedia.org/wiki/International_Phonetic_Alphabet#Letters
- # We # store the consonant table in this format to make updating it easier.
- ChartData = %Q{ | Labio-velar | Bi-labial | Labio-dental | Linguo-labial | Dental | Alveolar | Post-alveolar | Retro-flex | Palatal | Velar | Uvular | Pharyngeal | Glottal
+ # We store the consonant table in this format to make updating it easier.
+ #
+ # rubocop:disable Layout/TrailingWhitespace
+ ChartData = %( | Labio-velar | Bi-labial | Labio-dental | Linguo-labial | Dental | Alveolar | Post-alveolar | Retro-flex | Palatal | Velar | Uvular | Pharyngeal | Glottal
Nasal | | m̥ m | ɱ | n̼ | | n̥ n | | ɳ̊ ɳ | ɲ̊ ɲ | ŋ̊ ŋ | ɴ | |
Stop | | p b | p̪ b̪ | t̼ d̼ | | t d | | ʈ ɖ | c ɟ | k g | q ɢ | ʡ | ʔ
Sibilant fricative | | | | | | s z | ʃ ʒ | ʂ ʐ | ɕ ʑ | | | |
Non-sibilant fricative | | ɸ β | f v | θ̼ ð̼ | θ ð | θ̠ ð̠ | ɹ̠̊˔ ɹ̠˔ | ɻ˔ | ç ʝ | x ɣ | χ ʁ | ħ ʕ | h ɦ
Approximant | w | | ʋ̥ ʋ | | | ɹ̥ ɹ | | ɻ̊ ɻ | j̊ j | ɰ̊ ɰ | | | ʔ̞
Tap/flap | | ⱱ̟ | ⱱ | ɾ̼ | | ɾ̥ ɾ | | ɽ̊ ɽ | | | ɢ̆ | ʡ̆ |
Trill | | ʙ̥ ʙ | | | | r̥ r | | | | | ʀ̥ ʀ | ʜ ʢ |
Lateral fricative | | | | | | ɬ ɮ | | ɭ̊˔ ɭ˔ | ʎ̝̊ ʎ̝ | ʟ̝̊ ʟ̝ | | |
Lateral approximant | | | | | | l̥ l | | ɭ̊ ɭ | ʎ̥ ʎ | ʟ̥ ʟ | ʟ̠ | |
Lateral tap/flap | | | | | | ɺ | | ɭ̆ | ʎ̆ | ʟ̆ | | |
- }
+ )
+ # rubocop:enable Layout/TrailingWhitespace
# Parse the ChartData into a lookup table where we can retrieve attributes
# for each phoneme
def features
- @features ||= begin
+ @features ||= begin
header, *manners = ChartData.lines
_, *positions = header.chomp.split(' | ')
positions.map(&:strip!)
@@ -163,13 +166,13 @@
manners.each_with_object({}) do |row, phonemes|
manner, *columns = row.chomp.split(' | ')
manner.strip!
positions.zip(columns).each do |position, phoneme_text|
data = {
- position: position,
+ position: position,
position_index: position_indexes[position],
- manner: manner,
+ manner: manner,
}
# If there is a character in the first byte then this articulation
# has a voiceless phoneme. The symbol may use additional characters
# as part of the phoneme symbol.
unless phoneme_text[0] == ' '
@@ -216,23 +219,24 @@
def phonemes
Consonants.phonemes + Vowels.phonemes
end
- Symbols = Consonants.phonemes.reduce({}) {|acc, p| acc.update p => :consonant }.merge(
+ Symbols = Consonants.phonemes.reduce({}) { |acc, p| acc.update p => :consonant }.merge(
Vowels.phonemes.reduce({}) { |acc, p| acc.update p => :vowel }
)
def distance(phoneme1, phoneme2)
return 0 if phoneme1 == phoneme2
+
distance_map.fetch(phoneme1).fetch(phoneme2)
end
def distance_map
@distance_map ||= (
Vowels.phonemes + Consonants.phonemes
- ).permutation(2).each_with_object(Hash.new { |h, k| h[k] = {} } ) do |pair, scores|
+ ).permutation(2).each_with_object(Hash.new { |h, k| h[k] = {} }) do |pair, scores|
p1, p2 = *pair
score = _distance(p1, p2)
scores[p1][p2] = score
scores[p2][p1] = score
end
@@ -251,11 +255,11 @@
#
# grapheme_as_utf_8_long("ɰ̊")
# => 1413 (624 + (10 * 778))
def grapheme_as_utf_8_long(grapheme)
grapheme.unpack('U*').each_with_index.reduce(0) do |total, (byte, i)|
- total += (10**i) * byte
+ total + (10**i) * byte
end
end
# This will print a C code file with a function that implements a two-level C
# switch like the following:
@@ -297,11 +301,11 @@
writer.puts " case #{a_i}: // #{a}"
writer.puts ' switch (b) {'
distances.each do |(b, b_i), distance|
writer.puts " case #{b_i}: // #{a}->#{b}"
writer.puts " return (float) #{distance};"
- writer.puts " break;"
+ writer.puts ' break;'
end
writer.puts ' }'
end
writer.puts ' }'
writer.puts ' return 1.0;'
@@ -310,14 +314,14 @@
private
def _distance(phoneme1, phoneme2)
types = [Symbols.fetch(phoneme1), Symbols.fetch(phoneme2)].sort
- if types == [:consonant, :vowel]
+ if types == %i[consonant vowel]
1.0
- elsif types == [:vowel, :vowel]
+ elsif types == %i[vowel vowel]
Vowels.distance(phoneme1, phoneme2)
- elsif types == [:consonant, :consonant]
+ elsif types == %i[consonant consonant]
Consonants.distance(phoneme1, phoneme2)
end
end
end