#!/usr/bin/env ruby # frozen_string_literal: true require 'word-to-markdown' require 'reverse_asciidoctor' def scrub_whitespace(string) string = string.dup string.gsub!(' ', ' ') # HTML encoded spaces string.sub!(/\A[[:space:]]+/, '') # document leading whitespace string.sub!(/[[:space:]]+\z/, '') # document trailing whitespace string.gsub!(/([ ]+)$/, '') # line trailing whitespace string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks string.delete!(' ') # Unicode non-breaking spaces, injected as tabs # following added by me string.gsub!(%r{]*>}, " ") # I don't know why Libre Office is inserting them, but they need to go string.gsub!(%r{]* style="vertical-align: super;[^>]*>([^<]+)}, "\\1") # I absolutely don't know why Libre Office is rendering superscripts as h1 string end if ARGV.size != 1 && ARGV[0] != "--mathml2asciimath" || ARGV[0] == '--help' puts 'Usage: bundle exec w2m path/to/document.docx' exit 1 end if ARGV[0] == "--mathml2asciimath" && ARGV[1] ReverseAsciidoctor.config.mathml2asciimath = true ARGV[0] = ARGV[1] end if ARGV[0] == '--version' puts "WordToMarkdown v#{WordToMarkdown::VERSION}" puts "LibreOffice v#{WordToMarkdown.soffice.version}" unless Gem.win_platform? else doc = WordToMarkdown.new ARGV[0] # puts scrub_whitespace(doc.document.html) puts ReverseAsciidoctor.convert(scrub_whitespace(doc.document.html), WordToMarkdown::REVERSE_MARKDOWN_OPTIONS) end