require "aws-sdk-polly"
require "logger"
require "nokogiri"
require "htmlentities"
require "expeditor"
require "concurrent"
require "tmpdir"
module Ssml2mp3
class Builder
attr_reader :options, :sample_rate, :client, :logger, :expeditor_service
POLLY_TEXT_LENGTH_LIMIT = 1000
def initialize(options={})
@options = options.dup
@logger = @options.delete(:logger) || Logger.new(STDOUT)
@sample_rate = @options.delete(:sample_rate) || "16000"
@max_threads = @options.delete(:max_threads) || 10
@options[:region] ||= "us-west-2"
@client = Aws::Polly::Client.new(@options)
@expeditor_service = Expeditor::Service.new(
executor: Concurrent::ThreadPoolExecutor.new(
min_threads: 0,
max_threads: @max_threads,
)
)
@htmlentities = HTMLEntities.new
end
def synthesize_file(ssml_path, mp3_path)
basename = File.basename(mp3_path, ".mp3")
ssml = File.read(ssml_path)
File.open(mp3_path, "wb") do |output|
synthesize(ssml, basename, output)
end
logger.info("Generated: #{mp3_path}") if logger
end
def synthesize(ssml, basename, output)
ssmls = split_ssml(ssml)
tmp_files = []
commands = []
Dir.mktmpdir("foo") do |tmpdir|
ssmls.each_with_index do |ssml, i|
tmp_ssml_path = File.join(tmpdir, "#{basename}-#{i}.ssml")
File.write(tmp_ssml_path, ssml)
tmp_path = File.join(tmpdir, "#{basename}-#{i}.mp3")
command = Expeditor::Command.new(service: expeditor_service) do
logger.info("#{tmp_path}...") if logger
begin
client.synthesize_speech(
response_target: tmp_path,
output_format: "mp3",
sample_rate: sample_rate,
text: ssml,
text_type: "ssml",
voice_id: "Mizuki",
)
rescue => e
logger.error("#{e.message}\n#{ssml}")
logger.error("#{e.message}: #{tmp_ssml_path}\n#{ssml}")
raise e
end
end
command.start
commands << command
tmp_files << tmp_path
end
commands.each{|command| command.get }
tmp_files.each do |tmp_path|
File.open(tmp_path, "rb") do |tmp_file|
IO.copy_stream(tmp_file, output)
end
end
output.flush
end
end
def split_ssml(ssml)
doc = Nokogiri::XML.parse(tweak_ssml(ssml))
elements = doc.root.children
header = (%r((.+
", ""). gsub("
", '