#!/usr/bin/env ruby # frozen_string_literal: true require 'word-to-markdown' require 'optparse' require 'reverse_asciidoctor' def scrub_whitespace(string) string = string.dup string.gsub!(/ |\ |\u00a0/i, ' ') # HTML encoded spaces string.sub!(/^\A[[:space:]]+/m, '') # document leading whitespace string.sub!(/[[:space:]]+\z$/m, '') # document trailing whitespace string.gsub!(/([ ]+)$/, ' ') # line trailing whitespace string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks #string.delete!(' ') # Unicode non-breaking spaces, injected as tabs # following added by me string.gsub!(%r{]*>}, " ") # I don't know why Libre Office is inserting them, but they need to go string.gsub!(%r{]* style="vertical-align: super;[^>]*>([^<]+)}, "\\1") # I absolutely don't know why Libre Office is rendering superscripts as h1 string end ARGV.push('-h') if ARGV.empty? OptionParser.new do |opts| opts.banner = "Usage: w2a [options] " opts.on('-m', '--mathml2asciimath', 'Convert MathML to AsciiMath') do |v| ReverseAsciidoctor.config.mathml2asciimath = true end opts.on('-oFILENAME', '--output=FILENAME', 'Output file to write to') do |v| ReverseAsciidoctor.config.destination = File.expand_path(v) # puts "output goes to #{ReverseAsciidoctor.config.destination}" end opts.on('-e', '--external-images', 'Export images if data URI') do |v| ReverseAsciidoctor.config.external_images = true end opts.on('-v', '--version', 'Version information') do |v| puts "reverse_adoc: v#{ReverseAsciidoctor::VERSION}" puts "[dependency] WordToMarkdown: v#{WordToMarkdown::VERSION}" unless Gem.win_platform? puts "[dependency] LibreOffice: v#{WordToMarkdown.soffice.version}" else puts "[dependency] LibreOffice: version not available on Windows" end exit end opts.on("-h", "--help", "Prints this help") do puts opts exit end end.parse! filename = ARGV.pop raise "Please provide an input file to process. Exiting." unless filename if ReverseAsciidoctor.config.external_images && ReverseAsciidoctor.config.destination.nil? raise "The -e | --external-images feature must be used with -o | --output. Exiting." end ReverseAsciidoctor.config.sourcedir = Dir.mktmpdir # puts "ReverseAsciidoctor.config.sourcedir #{ReverseAsciidoctor.config.sourcedir}" doc = WordToMarkdown.new(filename, ReverseAsciidoctor.config.sourcedir) adoc_content = ReverseAsciidoctor.convert( scrub_whitespace(doc.document.html), WordToMarkdown::REVERSE_MARKDOWN_OPTIONS ) # puts scrub_whitespace(doc.document.html) # Print to STDOUT unless ReverseAsciidoctor.config.destination puts adoc_content exit end # Write output to ReverseAsciidoctor.config.destination FileUtils.mkdir_p(File.dirname(ReverseAsciidoctor.config.destination)) File.open(ReverseAsciidoctor.config.destination, "w") do |file| file.write(adoc_content) end