require 'open3' require 'tempfile' require 'timeout' class PandocRuby @@pandoc_path = 'pandoc' # The available readers and their corresponding names. The keys are used to # generate methods and specify options to Pandoc. READERS = { 'commonmark' => 'CommonMark Markdown', 'creole' => 'Creole 1.0', 'csv' => 'CSV table', 'docbook' => 'DocBook', 'docx' => 'Word docx', 'dokuwiki' => 'DokuWiki markup', 'epub' => 'EPUB', 'fb2' => 'FictionBook2 e-book', 'gfm' => 'GitHub-Flavored Markdown', 'haddock' => 'Haddock markup', 'html' => 'HTML', 'ipynb' => 'Jupyter notebook', 'jats' => 'JATS XML', 'jira' => 'Jira wiki markup', 'json' => 'JSON version of native AST', 'latex' => 'LaTex', 'man' => 'roff man', 'markdown' => "Pandoc's Markdown", 'markdown_mmd' => 'MultiMarkdown', 'markdown_phpextra' => 'PHP Markdown Extra', 'markdown_strict' => 'original unextended Markdown', 'mediawiki' => 'MediaWiki markup', 'muse' => 'Muse', 'native' => 'native Haskell', 'odt' => 'ODT', 'opml' => 'OPML', 'org' => 'Emacs Org mode', 'rst' => 'reStructuredText', 't2t' => 'txt2tags', 'textile' => 'Textile', 'tikiwiki' => 'TikiWiki markup', 'twiki' => 'TWiki markup', 'vimwiki' => 'Vimwiki' }.freeze # The available string writers and their corresponding names. The keys are # used to generate methods and specify options to Pandoc. STRING_WRITERS = { 'asciidoc' => 'AsciiDoc', 'asciidoctor' => 'AsciiDoctor', 'beamer' => 'LaTeX beamer slide show', 'commonmark' => 'CommonMark Markdown', 'context' => 'ConTeXt', 'docbook' => 'DocBook 4', 'docbook4' => 'DocBook 4', 'docbook5' => 'DocBook 5', 'dokuwiki' => 'DokuWiki markup', 'fb2' => 'FictionBook2 e-book', 'gfm' => 'GitHub-Flavored Markdown', 'haddock' => 'Haddock markup', 'html' => 'HTML, i.e. HTML5/XHTML polyglot markup', 'html5' => 'HTML, i.e. HTML5/XHTML polyglot markup', 'html4' => 'XHTML 1.0 Transitional', 'icml' => 'InDesign ICML', 'ipynb' => 'Jupyter notebook', 'jats_archiving' => 'JATS XML, Archiving and Interchange Tag Set', 'jats_articleauthoring' => 'JATS XML, Article Authoring Tag Set', 'jats_publishing' => 'JATS XML, Journal Publishing Tag Set', 'jats' => 'alias for jats_archiving', 'jira' => 'Jira wiki markup', 'json' => 'JSON version of native AST', 'latex' => 'LaTex', 'man' => 'roff man', 'markdown' => "Pandoc's Markdown", 'markdown_mmd' => 'MultiMarkdown', 'markdown_phpextra' => 'PHP Markdown Extra', 'markdown_strict' => 'original unextended Markdown', 'mediawiki' => 'MediaWiki markup', 'ms' => 'roff ms', 'muse' => 'Muse', 'native' => 'native Haskell', 'opml' => 'OPML', 'opendocument' => 'OpenDocument', 'org' => 'Emacs Org mode', 'pdf' => 'PDF', 'plain' => 'plain text', 'pptx' => 'PowerPoint slide show', 'rst' => 'reStructuredText', 'rtf' => 'Rich Text Format', 'texinfo' => 'GNU Texinfo', 'textile' => 'Textile', 'slideous' => 'Slideous HTML and JavaScript slide show', 'slidy' => 'Slidy HTML and JavaScript slide show', 'dzslides' => 'DZSlides HTML5 + JavaScript slide show', 'revealjs' => 'reveal.js HTML5 + JavaScript slide show', 's5' => 'S5 HTML and JavaScript slide show', 'tei' => 'TEI Simple', 'xwiki' => 'XWiki markup', 'zimwiki' => 'ZimWiki markup' }.freeze # The available binary writers and their corresponding names. The keys are # used to generate methods and specify options to Pandoc. BINARY_WRITERS = { 'odt' => 'OpenOffice text document', 'docx' => 'Word docx', 'epub' => 'EPUB v2', 'epub2' => 'EPUB v2', 'epub3' => 'EPUB v3' }.freeze # All of the available Writers. WRITERS = STRING_WRITERS.merge(BINARY_WRITERS) # To use run the pandoc command with a custom executable path, the path # to the pandoc executable can be set here. def self.pandoc_path=(path) @@pandoc_path = path end # A shortcut method that creates a new PandocRuby object and immediately # calls `#convert`. Options passed to this method are passed directly to # `#new` and treated the same as if they were passed directly to the # initializer. def self.convert(*args) new(*args).convert end attr_writer :binary_output def binary_output @binary_output ||= false end attr_writer :options def options @options ||= [] end attr_writer :option_string def option_string @option_string ||= '' end attr_writer :writer def writer @writer ||= 'html' end attr_accessor :input_files attr_accessor :input_string # Create a new PandocRuby converter object. The first argument contains the # input either as string or as an array of filenames. # # Any other arguments will be converted to pandoc options. # # Usage: # new("# A String", :option1 => :value, :option2) # new(["/path/to/file.md"], :option1 => :value, :option2) # new(["/to/file1.html", "/to/file2.html"], :option1 => :value) def initialize(*args) if args[0].is_a?(String) self.input_string = args.shift elsif args[0].is_a?(Array) self.input_files = args.shift.join(' ') end self.options = args end # Run the conversion. The convert method can take any number of arguments, # which will be converted to pandoc options. If options were already # specified in an initializer or reader method, they will be combined with # any that are passed to this method. # # Returns a string with the converted content. # # Example: # # PandocRuby.new("# text").convert # # => "