lib/phonetics/code_generator.rb in phonetics-1.8.0 vs lib/phonetics/code_generator.rb in phonetics-1.9.0
- old
+ new
@@ -23,10 +23,14 @@
writer.flush
end
private
+ def binary(str)
+ "0b#{str.bytes.map { |byte| byte.to_s(2).rjust(8, '0') }.join}"
+ end
+
# Turn the bytes of all phonemes into a lookup trie where a sequence of
# bytes can find a phoneme in linear time.
def phoneme_byte_trie
phoneme_byte_trie_for(Phonetics.phonemes)
end
@@ -50,26 +54,26 @@
subtrie[byte]
end
end
end
- def ruby_source
- location = caller_locations.first
- "#{location.path.split('/')[-4..-1].join('/')}:#{location.lineno}"
- end
-
- def describe(phoneme, depth)
- indent depth, "// Phoneme: #{phoneme.inspect}, bytes: #{phoneme.bytes.inspect}"
+ def describe(phoneme, depth = 0)
+ indent depth, "// Phoneme: '#{phoneme}', bytes: #{phoneme.bytes.inspect}"
if Phonetics::Consonants.features.key?(phoneme)
indent depth, "// consonant features: #{Phonetics::Consonants.features[phoneme].to_json}"
else
indent depth, "// vowel features: #{Phonetics::Vowels::FormantFrequencies[phoneme].to_json}"
end
end
+ def ruby_source
+ location = caller_locations.first
+ "#{location.path.split('/')[-4..-1].join('/')}:#{location.lineno}"
+ end
+
def indent(depth, line)
- write " #{' ' * depth}#{line}"
+ write " #{' ' * depth}#{line}"
end
def write(line)
writer.puts line
end
@@ -86,115 +90,48 @@
# next_phoneme_length() function.
#
# This will print a C code file with a function that implements a multil-level C
# switch like the following:
#
- # switch (phoneme1_length) {
- # case 2:
- # switch(string1[1]) {
- # case 201: // first byte of "ɪ"
- # switch(string1[3]) {
- # case 170: // second and final byte of "ɪ"
- # // Phoneme: "ɪ", bytes: [201, 170]
+ # switch (phoneme1) {
+ # case 'ɪ': // two bytes: [201, 170]
# // vowel features: {"F1":300,"F2":2100,"rounded":false}
- # switch(string2[6]) {
- # case 105: // first and only byte of "i"
- # // Phoneme: "i", bytes: [105]
- # // vowel features: {"F1":240,"F2":2400,"rounded":false}
- # return (float) 0.14355381904337383;
- # break;
+ #
+ # switch(phoneme2) {
+ # 'i': // one byte: [105]
+ # // vowel features: {"F1":240,"F2":2400,"rounded":false}
+ # return (float) 0.14355381904337383;
+ # break;
#
# the distance of ("ɪ", "i")2 is therefore 0.14355
#
def generate
write(<<-HEADER.gsub(/^ {6}/, ''))
// This is compiled from Ruby, in #{ruby_source}
- #include <stdbool.h>
- #include <stdio.h>
- #include "./phonemes.h"
- float phonetic_cost(int *string1, int string1_offset, int phoneme1_length, int *string2, int string2_offset, int phoneme2_length) {
+ #include <stdint.h>
+ float phonetic_cost(int64_t phoneme1, int64_t phoneme2) {
HEADER
- write ' switch (phoneme1_length) {'
- by_byte_length.each do |length, phonemes|
- write " case #{length}:"
- switch_phoneme1(phoneme_byte_trie_for(phonemes), 0)
- write ' break;'
+ write ' switch (phoneme1) {'
+ Phonetics.phonemes.each do |phoneme1|
+ write " case #{binary(phoneme1)}:"
+ describe(phoneme1, 2)
+ write " switch(phoneme2) {"
+ Phonetics.distance_map[phoneme1].each do |phoneme2, distance|
+ write " case #{binary(phoneme2)}:"
+ describe(phoneme2, 6)
+ write " return (float) #{distance};"
+ write ' break;'
+ end
+ write " }"
+ write ' break;'
end
write ' }'
write ' return (float) 1.0;'
write '};'
write ''
- end
-
- def switch_phoneme1(trie, depth = 0)
- indent depth, "switch(string1[string1_offset + #{depth}]) {"
- trie.each do |key, subtrie|
- next if key == :source
- next if subtrie.empty?
-
- indent depth + 1, "case #{key}:"
-
- phoneme1 = subtrie[:source]
-
- # If this could be a match of a phoneme1 then find phoneme2
- if phoneme1
- # Add a comment to help understand the dataset
- describe(phoneme1, depth + 2) if phoneme1
-
- by_byte_length.each do |_, phonemes|
- byte_trie = phoneme_byte_trie_for(phonemes)
- next if byte_trie.empty?
-
- switch_phoneme2(byte_trie, phoneme1, 0)
- end
- else
- switch_phoneme1(subtrie, depth + 1)
- end
-
- indent depth + 2, 'break;'
- end
- indent depth, '}'
- end
-
- def switch_phoneme2(trie, previous_phoneme, depth = 0)
- indent depth, "switch(string2[string2_offset + #{depth}]) {"
- trie.each do |key, subtrie|
- next if key == :source
- next if subtrie.empty?
-
- phoneme2 = subtrie[:source]
-
- indent depth + 1, "case #{key}:"
-
- if phoneme2
- value = if previous_phoneme == phoneme2
- 0.0
- else
- distance(previous_phoneme, phoneme2)
- end
- # Add a comment to help understand the dataset
- describe(phoneme2, depth + 2)
- indent depth + 2, "return (float) #{value};"
- else
- switch_phoneme2(subtrie, previous_phoneme, depth + 1)
- end
-
- indent depth + 2, 'break;'
- end
- indent depth, '}'
- end
-
- def by_byte_length
- Phonetics.phonemes.group_by do |phoneme|
- phoneme.bytes.length
- end.sort_by(&:first)
- end
-
- def distance(p1, p2)
- Phonetics.distance_map[p1][p2]
end
end
class NextPhonemeLength < CodeGenerator
# There's no simple way to break a string of IPA characters into phonemes.