bin/w2a in reverse_adoc-0.2.5 vs bin/w2a in reverse_adoc-0.2.6
- old
+ new
@@ -1,27 +1,14 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
+require 'rubygems'
+require 'bundler/setup'
require 'word-to-markdown'
require 'optparse'
require 'reverse_asciidoctor'
-def scrub_whitespace(string)
- string = string.dup
- string.gsub!(/ |\ |\u00a0/i, ' ') # HTML encoded spaces
- string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace
- string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace
- string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace
- string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
- #string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
- # following added by me
- string.gsub!(%r{<h[1-9][^>]*></h1>}, " ") # I don't know why Libre Office is inserting them, but they need to go
- string.gsub!(%r{<h1[^>]* style="vertical-align: super;[^>]*>([^<]+)</h1>},
- "<sup>\\1</sup>") # I absolutely don't know why Libre Office is rendering superscripts as h1
- string
-end
-
ARGV.push('-h') if ARGV.empty?
OptionParser.new do |opts|
opts.banner = "Usage: w2a [options] <file>"
opts.on('-m', '--mathml2asciimath', 'Convert MathML to AsciiMath') do |v|
@@ -64,12 +51,12 @@
ReverseAsciidoctor.config.sourcedir = Dir.mktmpdir
# puts "ReverseAsciidoctor.config.sourcedir #{ReverseAsciidoctor.config.sourcedir}"
doc = WordToMarkdown.new(filename, ReverseAsciidoctor.config.sourcedir)
-#File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
+File.open("test.html", "w:UTF-8") { |f| f.write doc.document.html }
adoc_content = ReverseAsciidoctor.convert(
- scrub_whitespace(doc.document.html),
+ ReverseAsciidoctor.cleaner.preprocess_word_html(doc.document.html),
WordToMarkdown::REVERSE_MARKDOWN_OPTIONS
)
# puts scrub_whitespace(doc.document.html)
# Print to STDOUT