require "nokogiri" module Goethe class Utils class << self # # 从文本中找到链接,转化为 `` 标签,找到换行符,转换为 `
` 标签 # # @param [String] str # # @return [String] # def auto_link(str) str = str.dup str.gsub!(/<(S*?)[^>]*>.*?|<.*? \/>/) do |s| s.gsub!(//, ">") end str.gsub!(/((https|http|ftp):\/\/)([a-zA-Z0-9.\-_%&=\/\#:\?]+)/i) do protocol, url = $1, $3 %Q{
#{protocol}#{url}} end str.gsub!(/\r\n|\n/, "
") str end # # 替换文本中所有的 HTML 标签,默认替换成空格 # # @param [String] str # @param [String] replacement - 替换的字符,默认是空格 # # @return [String] # def remove_html_tags(str, replacement: " ") return "" if str.nil? str.gsub(Goethe::Regex[:HTML_TAGS], replacement) .gsub(Goethe::Regex[:ADDITIONAL_HTML_TAG], replacement) end # # 移除文本中所有的 Markdown 控制字符 # # @param [String] str # # @return [String] # def remove_markdown_symbols(str) return "" if str.nil? result = "" # HEADERS result = str.gsub(Goethe::Regex[:MARKDOWN][:HEADERS], "") #p "HEADERS: #{result}, #{result.size}" result = result.gsub(Goethe::Regex[:MARKDOWN][:BLOCKQUOTES], "") #p "BLOCKQUOTES: #{result}, #{result.size}" # RULERS result = result.gsub(Goethe::Regex[:MARKDOWN][:HRULERS], "") do $2 end #p "RULERS: #{result}, #{result.size}" # LISTS result = result.gsub(Goethe::Regex[:MARKDOWN][:LISTS], "") #p "LISTS: #{result}, #{result.size}" # EMPHASIS result = result.gsub(Goethe::Regex[:MARKDOWN][:EMPHASIS]) do $2 end # IMAGES result = result.gsub(Goethe::Regex[:MARKDOWN][:IMAGES], "") # LINKs result = result.gsub(Goethe::Regex[:MARKDOWN][:LINKS]) do $1 end # COPYRIGHT result = result.gsub(Goethe::Regex[:MARKDOWN][:COPYRIGHT], "") # result = result.gsub(Goethe::Regex[:MARKDOWN][:QUICK_LINKS]) do " #{$1} " end result end end end end