Sha256: cb61ca9d666f6ab78f3ad4543fbbd9c639f5b783cab4e1779d3ce2e07d50e776
Contents?: true
Size: 1.1 KB
Versions: 1
Compression:
Stored size: 1.1 KB
Contents
require "sanitize" require "net/http" module AozoraPolly class Builder def url2ssml(aozora_url) uri = URI.parse(aozora_url) html = Net::HTTP.get(uri).force_encoding("cp932") html2ssml(html) end def html2ssml(html) doc = ::AozoraPolly::Document.parse(html.encode("utf-8")) main_ssml = html2ssml_fragment(doc.main_text) bib_ssml = html2ssml_fragment(doc.bibliography) create_ssml(doc, main_ssml + bib_ssml) end def html2ssml_fragment(fragment) Sanitize. fragment(fragment, elements: %w(br ruby rb rp rt div h1 h2 h3 h4 h5)). gsub(%r(<br\s*/?>), '<break />'). gsub(%r(<(?:h\d|div)[^>]*>), '<p>'). gsub(%r(</(?:h\d|div)>), '</p>'). gsub(%r(</?ruby>), ''). gsub(%r(<rb>[^<]+</rb>), ''). gsub(%r(<rp>[^<]+</rp>), ''). gsub(%r(</?rt>), '') end def create_ssml(doc, ssml_body) <<-XML <?xml version="1.0"?> <speak version="1.1" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="ja"> <p>#{doc.title}</p> <p>#{doc.author}</p> #{ssml_body} </speak> XML end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
aozora-polly-0.2.0 | lib/aozora_polly/builder.rb |