# frozen_string_literal: true require "relaton_bib/localized_string" module RelatonBib # Formatted string class FormattedString < LocalizedString FORMATS = %w[text/plain text/html application/docbook+xml application/tei+xml text/x-asciidoc text/markdown application/x-metanorma+xml].freeze # @return [String] attr_reader :format # @param content [String, Array] # @param language [String, nil] language code Iso639 # @param script [String, nil] script code Iso15924 # @param format [String] the content type def initialize(content:, language: nil, script: nil, format: "text/plain") # if format && !FORMATS.include?(format) # raise ArgumentError, %{Format "#{format}" is invalid.} # end @format = format super(content, language, script) end # @param builder [Nokogiri::XML::Builder] def to_xml(builder) builder.parent["format"] = format if format super end # # Encode content. # # @param [String] cnt content # # @return [String] encoded content # def encode(cnt) # rubocop:disable Metrics/MethodLength return escp(cnt) unless format == "text/html" parts = cnt.scan(%r{ <(?\w+)(?[^>]*)> | # tag open \w+)> | # tag close (?) | # comment (?.+?)(?=<|$) # content }x) scan_xml parts end # # Scan XML and escape HTML entities. # # @param [Array>] parts XML parts # # @return [String] output string # def scan_xml(parts) # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/MethodLength return "" unless parts.any? out = "" while parts.any? && (parts.first[3] || parts.first[4]) _, _, _, cmt, cnt = parts.shift out += "#{cmt}#{escp(cnt)}" end unless out.empty? out += scan_xml(parts) if parts.any? && parts.first[0] return out end tago, attrs, tagc, = parts.shift out = if tago && attrs && attrs[-1] == "/" "<#{tago}#{attrs}>" elsif tago inr = scan_xml parts _, _, tagc, = parts.shift if tago == tagc "<#{tago}#{attrs}>#{inr}" else "#{escp("<#{tago}#{attrs}>")}#{inr}#{escp("")}" end end out += scan_xml(parts) if parts.any? && (parts.first[0] || parts.first[3] || parts.first[4]) out end # @return [Hash] def to_hash hash = super return hash unless format hash = { "content" => hash } unless hash.is_a? Hash hash["format"] = format hash end # @param prefix [String] # @param count [Integer] number of elements # @return [String] def to_asciibib(prefix = "", count = 1, has_attrs = false) has_attrs ||= !(format.nil? || format.empty?) pref = prefix.empty? ? prefix : "#{prefix}." # out = count > 1 ? "#{prefix}::\n" : "" out = super out += "#{pref}format:: #{format}\n" if format out end # # Remove HTML tags except , , , , , ,
,

. # Replace with , with . # # @param [String] str content # # @return [String] cleaned content # def cleanup(str) return str unless format == "text/html" str.gsub(/(?<=<)\w+:(?=\w+>)/, "").gsub(/(?<=<\/)\w+:(?=\w+>)/, "") .gsub(//, "").gsub(/<\/i>/, "") .gsub(//, "").gsub(/<\/b>/, "") .gsub(/<(?!\/?(em|strong|stem|sup|sub|tt|br\s?\/|p))[^\s!]\/?.*?>/, " ") .gsub(/\s+([.,:;!?<])/, "\\1").strip end end end