module Bergamasco module Markdown # split file into YAML frontmatter and content YAML_FRONT_MATTER_REGEXP = /\A(---\s*\n.*?\n?)^((---|\.\.\.)\s*$\n?)/m def self.split_yaml_frontmatter(file) return file unless match = YAML_FRONT_MATTER_REGEXP.match(file) metadata = SafeYAML.load(file) content = match.post_match [metadata, content] end def self.update_yaml_frontmatter(metadata, new_metadata) metadata.merge(new_metadata).compact end def self.join_yaml_frontmatter(metadata, content) metadata.to_yaml + "---\n" + content end def self.update_file(filepath, new_metadata) file = IO.read(filepath) metadata, content = split_yaml_frontmatter(file) metadata = update_yaml_frontmatter(metadata, new_metadata) new_file = join_yaml_frontmatter(metadata, content) IO.write(filepath, new_file) metadata end def self.read_yaml(filepath) unless File.exist?(filepath) parentdir = Pathname.new(filepath).parent FileUtils.mkdir_p parentdir FileUtils.touch filepath end file = IO.read(filepath) SafeYAML.load(file) end def self.read_yaml_for_doi_metadata(filepath, options={}) return nil unless File.exist?(filepath) file = IO.read(filepath) yaml = SafeYAML.load(file) return nil unless yaml.present? keys = options[:keys] || ["title", "author", "date", "tags", "summary", "accession_number", "doi", "type", "version", "references", "published"] metadata = yaml.extract!(*keys).compact content = YAML_FRONT_MATTER_REGEXP.match(file).post_match html = Bergamasco::Pandoc.convert(content, options) metadata["summary"] = Bergamasco::Summarize.summary_from_html(html, options) metadata["references"] = extract_references(html) metadata end def self.write_yaml(filepath, content) IO.write(filepath, content.to_yaml) end # expects a references list generated by pandoc def self.extract_references(html) doc = Nokogiri::HTML(html) doc.xpath('//div[@id="refs"]/div/@id').map { |ref| ref.value[4..-1] } end end end