Sha256: 5c7d71114087c97bfacf67dcf6df1b4c9e8f20be000cbe2a9210cac8af3f4957

Contents?: true

Size: 1.45 KB

Versions: 1

Compression:

Stored size: 1.45 KB

Contents

require "ssml2mp3"
require "sanitize"
require "net/http"

module AozoraPolly
  class Builder
    attr_reader :mp3_builder

    def initialize
      @mp3_builder = Ssml2mp3::Builder.new
    end

    def url2mp3(aozora_url, mp3_path)
      ssml = url2ssml(aozora_url)

      basename = File.basename(mp3_path, ".mp3")
      File.open(mp3_path, "wb") do |output|
        mp3_builder.synthesize(ssml, basename, output)
      end
    end

    def url2ssml(aozora_url)
      uri = URI.parse(aozora_url)
      html = Net::HTTP.get(uri).force_encoding("cp932")
      html2ssml(html)
    end

    def html2ssml(html)
      doc = ::AozoraPolly::Document.parse(html.encode("utf-8"))

      main_ssml = html2ssml_fragment(doc.main_text)
      bib_ssml = html2ssml_fragment(doc.bibliography)
      create_ssml(doc, main_ssml + bib_ssml)
    end

    def html2ssml_fragment(fragment)
      Sanitize.
        fragment(fragment, elements: %w(br ruby rb rp rt div h1 h2 h3 h4 h5)).
        gsub(%r(<br\s*/?>), '<break />').
        gsub(%r(<(?:h\d|div)[^>]*>), '<p>').
        gsub(%r(</(?:h\d|div)>), '</p>').
        gsub(%r(</?ruby>), '').
        gsub(%r(<rb>[^<]*</rb>), '').
        gsub(%r(<rp>[^<]*</rp>), '').
        gsub(%r(</?rt>), '')
    end

    def create_ssml(doc, ssml_body)
      <<-XML
<?xml version="1.0"?>
<speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="ja">
<p>#{doc.title}</p>
<p>#{doc.author}</p>

#{ssml_body}
</speak>
      XML
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
aozora-polly-0.3.1 lib/aozora_polly/builder.rb