lib/asciidoctor-epub3/converter.rb in asciidoctor-epub3-1.0.0.alpha.1 vs lib/asciidoctor-epub3/converter.rb in asciidoctor-epub3-1.0.0.alpha.2

- old
+ new

@@ -2,12 +2,13 @@ require_relative 'spine_item_processor' require_relative 'font_icon_map' module Asciidoctor module Epub3 -#WordJoiner = [8288].pack 'U*' +# tried 8288, but it didn't work in older readers WordJoiner = [65279].pack 'U*' +WordJoinerRx = RUBY_ENGINE_JRUBY ? /uFEFF/ : WordJoiner # Public: The main converter for the epub3 backend that handles packaging the # EPUB3 or KF8 publication file. class Converter include ::Asciidoctor::Converter @@ -92,11 +93,11 @@ end end # TODO aggregate authors of spine document into authors attribute(s) on main document def navigation_document node, spine - doctitle_sanitized = ((node.doctitle sanitize: true) || (node.attr 'untitled-label')).gsub WordJoiner, '' + doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, '' lines = [%(<!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="#{lang = (node.attr 'lang', 'en')}" lang="#{lang}"> <head> <meta charset="UTF-8"/> <title>#{doctitle_sanitized}</title> @@ -107,38 +108,34 @@ <h1>#{doctitle_sanitized}</h1> <nav epub:type="toc" id="toc"> <h2>#{node.attr 'toc-title'}</h2> <ol>)] spine.each do |item| - lines << %(<li><a href="#{item.id || (item.attr 'docname')}.xhtml">#{((item.doctitle sanitize: true) || (item.attr 'untitled-label')).gsub WordJoiner, ''}</a></li>) + lines << %(<li><a href="#{item.id || (item.attr 'docname')}.xhtml">#{(item.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, ''}</a></li>) end lines << %(</ol> </nav> </body> </html>) lines * EOL end def document node docid = node.id - if (doctitle = node.doctitle) - doctitle_sanitized = (node.doctitle sanitize: :sgml).gsub WordJoiner, '' - if doctitle.include? ': ' - title, _, subtitle = doctitle.rpartition ': ' - else - # HACK until we get proper handling of title-only in CSS - title = '' - subtitle = doctitle - end + + if (doctitle = node.doctitle partition: true, sanitize: true, use_fallback: true).subtitle? + title = doctitle.main + title_upper = title.upcase + subtitle = doctitle.subtitle else # HACK until we get proper handling of title-only in CSS - title = '' - subtitle = node.attr 'untitled-label' + title = title_upper = '' + subtitle = doctitle.combined end - subtitle_formatted = subtitle.gsub(WordJoiner, '').split(' ').map {|w| %(<b>#{w}</b>) } * ' ' - title_upper = title.upcase + doctitle_sanitized = (node.doctitle sanitize: true, use_fallback: true).gsub WordJoinerRx, '' + subtitle_formatted = subtitle.gsub(WordJoinerRx, '').split(' ').map {|w| %(<b>#{w}</b>) } * ' ' # FIXME make this uppercase routine more intelligent, less fragile subtitle_formatted_upper = subtitle_formatted.upcase .gsub(UppercaseTagRx) { %(<#{$1}#{$2.downcase}>) } .gsub(NamedEntityRx) { %(&#{$1.downcase};) } @@ -792,11 +789,11 @@ def xml_sanitize value, target = :attribute sanitized = (value.include? '<') ? value.gsub(XmlElementRx, '').tr_s(' ', ' ').strip : value if target == :plain && (sanitized.include? ';') sanitized = sanitized.gsub(CharEntityRx) { [$1.to_i].pack('U*') }.gsub(FromHtmlSpecialCharsRx, FromHtmlSpecialCharsMap) elsif target == :attribute - sanitized = sanitized.gsub(WordJoiner, '').gsub('"', '&quot;') + sanitized = sanitized.gsub(WordJoinerRx, '').gsub('"', '&quot;') end sanitized end # TODO make check for last content paragraph a feature of Asciidoctor @@ -815,10 +812,10 @@ class DocumentIdGenerator class << self def generate_id doc unless (id = doc.id) id = if doc.header? - doc.doctitle(sanitize: :sgml).gsub(WordJoiner, '').downcase.delete(':').tr_s(' ', '-').tr_s('-', '-') + doc.doctitle(sanitize: true).gsub(WordJoinerRx, '').downcase.delete(':').tr_s(' ', '-').tr_s('-', '-') elsif (first_section = doc.first_section) first_section.id else %(document-#{doc.object_id}) end