lib/phonetics/code_generator.rb in phonetics-1.8.0 vs lib/phonetics/code_generator.rb in phonetics-1.9.0

- old
+ new

@@ -23,10 +23,14 @@ writer.flush end private + def binary(str) + "0b#{str.bytes.map { |byte| byte.to_s(2).rjust(8, '0') }.join}" + end + # Turn the bytes of all phonemes into a lookup trie where a sequence of # bytes can find a phoneme in linear time. def phoneme_byte_trie phoneme_byte_trie_for(Phonetics.phonemes) end @@ -50,26 +54,26 @@ subtrie[byte] end end end - def ruby_source - location = caller_locations.first - "#{location.path.split('/')[-4..-1].join('/')}:#{location.lineno}" - end - - def describe(phoneme, depth) - indent depth, "// Phoneme: #{phoneme.inspect}, bytes: #{phoneme.bytes.inspect}" + def describe(phoneme, depth = 0) + indent depth, "// Phoneme: '#{phoneme}', bytes: #{phoneme.bytes.inspect}" if Phonetics::Consonants.features.key?(phoneme) indent depth, "// consonant features: #{Phonetics::Consonants.features[phoneme].to_json}" else indent depth, "// vowel features: #{Phonetics::Vowels::FormantFrequencies[phoneme].to_json}" end end + def ruby_source + location = caller_locations.first + "#{location.path.split('/')[-4..-1].join('/')}:#{location.lineno}" + end + def indent(depth, line) - write " #{' ' * depth}#{line}" + write " #{' ' * depth}#{line}" end def write(line) writer.puts line end @@ -86,115 +90,48 @@ # next_phoneme_length() function. # # This will print a C code file with a function that implements a multil-level C # switch like the following: # - # switch (phoneme1_length) { - # case 2: - # switch(string1[1]) { - # case 201: // first byte of "ɪ" - # switch(string1[3]) { - # case 170: // second and final byte of "ɪ" - # // Phoneme: "ɪ", bytes: [201, 170] + # switch (phoneme1) { + # case 'ɪ': // two bytes: [201, 170] # // vowel features: {"F1":300,"F2":2100,"rounded":false} - # switch(string2[6]) { - # case 105: // first and only byte of "i" - # // Phoneme: "i", bytes: [105] - # // vowel features: {"F1":240,"F2":2400,"rounded":false} - # return (float) 0.14355381904337383; - # break; + # + # switch(phoneme2) { + # 'i': // one byte: [105] + # // vowel features: {"F1":240,"F2":2400,"rounded":false} + # return (float) 0.14355381904337383; + # break; # # the distance of ("ɪ", "i")2 is therefore 0.14355 # def generate write(<<-HEADER.gsub(/^ {6}/, '')) // This is compiled from Ruby, in #{ruby_source} - #include <stdbool.h> - #include <stdio.h> - #include "./phonemes.h" - float phonetic_cost(int *string1, int string1_offset, int phoneme1_length, int *string2, int string2_offset, int phoneme2_length) { + #include <stdint.h> + float phonetic_cost(int64_t phoneme1, int64_t phoneme2) { HEADER - write ' switch (phoneme1_length) {' - by_byte_length.each do |length, phonemes| - write " case #{length}:" - switch_phoneme1(phoneme_byte_trie_for(phonemes), 0) - write ' break;' + write ' switch (phoneme1) {' + Phonetics.phonemes.each do |phoneme1| + write " case #{binary(phoneme1)}:" + describe(phoneme1, 2) + write " switch(phoneme2) {" + Phonetics.distance_map[phoneme1].each do |phoneme2, distance| + write " case #{binary(phoneme2)}:" + describe(phoneme2, 6) + write " return (float) #{distance};" + write ' break;' + end + write " }" + write ' break;' end write ' }' write ' return (float) 1.0;' write '};' write '' - end - - def switch_phoneme1(trie, depth = 0) - indent depth, "switch(string1[string1_offset + #{depth}]) {" - trie.each do |key, subtrie| - next if key == :source - next if subtrie.empty? - - indent depth + 1, "case #{key}:" - - phoneme1 = subtrie[:source] - - # If this could be a match of a phoneme1 then find phoneme2 - if phoneme1 - # Add a comment to help understand the dataset - describe(phoneme1, depth + 2) if phoneme1 - - by_byte_length.each do |_, phonemes| - byte_trie = phoneme_byte_trie_for(phonemes) - next if byte_trie.empty? - - switch_phoneme2(byte_trie, phoneme1, 0) - end - else - switch_phoneme1(subtrie, depth + 1) - end - - indent depth + 2, 'break;' - end - indent depth, '}' - end - - def switch_phoneme2(trie, previous_phoneme, depth = 0) - indent depth, "switch(string2[string2_offset + #{depth}]) {" - trie.each do |key, subtrie| - next if key == :source - next if subtrie.empty? - - phoneme2 = subtrie[:source] - - indent depth + 1, "case #{key}:" - - if phoneme2 - value = if previous_phoneme == phoneme2 - 0.0 - else - distance(previous_phoneme, phoneme2) - end - # Add a comment to help understand the dataset - describe(phoneme2, depth + 2) - indent depth + 2, "return (float) #{value};" - else - switch_phoneme2(subtrie, previous_phoneme, depth + 1) - end - - indent depth + 2, 'break;' - end - indent depth, '}' - end - - def by_byte_length - Phonetics.phonemes.group_by do |phoneme| - phoneme.bytes.length - end.sort_by(&:first) - end - - def distance(p1, p2) - Phonetics.distance_map[p1][p2] end end class NextPhonemeLength < CodeGenerator # There's no simple way to break a string of IPA characters into phonemes.