#!/usr/bin/env ruby

require "pathname"
require "json"
require "asciidoctor"
require "open3"
require "cgi"
require "uri/generic"

class GrepDocTree
  Line_info = Struct.new(:line, :line_no) {
    def initialize(line, line_no)
      self.line = line
      self.line_no = Integer(line_no)
    end
  }

  # grep_opts:
  # :search_top
  # :search_phrase
  # :ignorecase
  # :useregexp
  def initialize(grep_opts)
    @grep_opts = "-nHr --include '*.adoc' "
    @grep_opts += "-i " if grep_opts[:ignorecase]
    @grep_opts += "-F " unless grep_opts[:useregexp]

    @search_root = grep_opts[:searchassetstop]
    @input = grep_opts[:search_phrase]

    @output = ""
    @error = ""
    @status = 0
    @match_index = {}
  end

  def grep
    # This console code sequence will only show the matching word in bold ms=01:mc=:sl=:cx=:fn=:ln=:bn=:se=
    grep_env = "GREP_COLORS=\"ms=01:mc=:sl=:cx=:fn=:ln=:bn=:se=\""
    @grep_opts += " --color=always"


    @output, @error, @status = Open3.capture3("#{grep_env} grep #{@grep_opts} \"#{@input}\" #{@search_root}")

    begin
      @output.force_encoding(Encoding::UTF_8)
      @output.gsub!(/\x1b\[01m\x1b\[K/, "##")
      @output.gsub!(/\x1b\[m\x1b\[K/, "##")
    rescue StandardError => e
      print e.message
      print e.backtrace.inspect
      exit 0
    end

    grep2hash @search_root
  end

  # returns an indexed output where each match from the search is associated with the
  # corresponding src file's closest heading.
  # the format of the output:
  # {html_filename#heading : [line_1, line_2, ...], ...}
  #
  # The heading_db has the following JSON format
  # {
  #   file_infos : [{
  #     filepath : filepath_1,
  #     title : Title,
  #     sections : [{
  #       id : section_id_1,
  #       title : section_title_1,
  #       line_no : line_no
  #     },
  #     {
  #       id : section_id_1,
  #       title : section_title_1,
  #       line_no : line_no
  #     },
  #     ...
  #     ]
  #   },
  #   {
  #     filepath : filepath_1,
  #     ...
  #   }]
  # }
  def match_with_headings heading_db
    matches = []

    # for each file with at least one match
    @match_index.each do |file_path, match_infos|
      # assume that max one file with the specified path
      # exists
      files = heading_db["file_infos"].select do |fi|
        fi["filepath"] == file_path.to_s
      end
      next if files.empty?

      file_anchors = construct_user_info files.first, match_infos
      matches << file_anchors
    end
    matches
  end

  # Produce a hash with all info needed for the user to navigate to the
  # matching html section for all matches to the file in the supplied file
  # info hash.
  #
  # format of the resulting hash:
  # {
  #   filepath : Filepath,
  #   title : Title,
  #   matches : {
  #       section_id :
  #       {
  #         section_title : Section Title,
  #         location : Location,
  #         lines : [line_1, line_2, ...]
  #       }
  #     }
  #   ]
  # }
  #
  def construct_user_info file_info, match_infos
    matches = {}
    file_anchors = {
        "filepath" => file_info["filepath"],
        "title" => file_info["title"],
        "matches" => matches
    }

    match_infos.each do |match_info|
      match_line_nr = match_info.line_no

      # find section with closest lower line_no to line_info
      best_so_far = 0
      chosen_section_info = {}
      file_info["sections"].each do |section_info|
        l = Integer(section_info["line_no"])
        if l <= match_line_nr && l > best_so_far
          chosen_section_info = section_info
        end
      end

      matches[chosen_section_info["id"]] =
          {
              "section_title" => chosen_section_info["title"],
              "location" => "#{Pathname.new(file_info["filepath"]).sub_ext(".html").to_s}##{chosen_section_info["id"]}",
              "lines" => []
          } unless matches.key?(chosen_section_info["id"])
      matches[chosen_section_info["id"]]["lines"] << match_info.line
    end
    file_anchors
  end

  def formatted_output
    # assume we have an updated index
    adoc_str = ""
    @match_index.each do |k, v|
      adoc_str += "#{k}::\n"
      v.each { |line_info|
        adoc_str += "#{line_info.line_no} : #{line_info.line}\n"
      }
    end
    adoc_str
  end

  private

  # converts the 'raw' matches from grep into a hash.
  # i.e. from:
  # <filename>:<line_no>:<line>
  # <filename>:<line_no>:<line>
  # ...
  #
  # to
  # {file_path : [line_info1, line_info2, ...], ...}
  def grep2hash(base_dir)
    @match_index = {}
    @output.split("\n").each do |line|
      tokens = line.split(":", 3)

      # remove all lines starting with :<attrib>:
      tokens[2].gsub!(/^:[[:graph:]]+:.*$/, "")
      next if tokens[2].empty?

      # remove everything above the repo root from the filepath
      file_path = Pathname.new(tokens[0]).relative_path_from Pathname.new(base_dir)
      @match_index[file_path] = [] unless @match_index.key? file_path
      @match_index[file_path] << Line_info.new(tokens[2], tokens[1])
    end
  end
end

class SearchDocTree
  def initialize(input_data)
    @input_data = input_data
  end

  def search
    # read the heading_db from file
    jsonpath = @input_data[:searchassetstop].join("heading_index.json")
    json = File.read(jsonpath.to_s)
    src_index = JSON.parse(json)

    # search the doc tree for regex
    gt = GrepDocTree.new @input_data
    gt.grep

    matches = gt.match_with_headings src_index

    format_search_adoc matches, get_uri_top
  end

  private

  def get_uri_top
    return @input_data[:referer][0, @input_data[:referer].rindex('/')]
  end

  def wash_line line
    # remove any '::'
    result = line.gsub(/::*/, "")
    # remove =,| at the start of a line
    result.gsub!(/^[=|]+/, "")
    result
  end

  # index is an array of file_info, see construct_user_info
  # for format per file
  # == Title (filename)
  #
  # <<location,section_title>>::
  # line_1
  # line_2
  # ...
  def format_search_adoc index, uri_top
    str = ""
    # debug print referer...
    # str << "uri_top: #{uri_top}\n"
    index.each do |file_info|
      filename = Pathname.new(file_info["filepath"]).basename
      str << "== #{file_info["title"]}\n\n"
      file_info["matches"].each do |section_id, info|
        str << "#{uri_top}/#{info["location"]}[#{info["section_title"]}]::\n\n"
        # str << "<<#{info["location"]},#{info["section_title"]}>>::\n\n"
        str << "[subs=\"quotes\"]\n"
        str << "----\n"
        info["lines"].each do |line|
          str << "-- #{wash_line(line)}\n"
        end.join("\n\n")
        str << "----\n"
      end
      str << "\n"
    end

    <<~ADOC
      = Search Result

      #{str}
    ADOC
  end
end

def init_web_server web_root
  require 'webrick'

  root = File.expand_path web_root
  puts "Trying to start a WEBrick instance at port 8000 serving files from #{web_root}..."

  server = WEBrick::HTTPServer.new(
      :Port => 8000,
      :DocumentRoot => root,
      :Logger => WEBrick::Log.new("webrick.log", WEBrick::Log::DEBUG)
  )

  puts "WEBrick instance now listening to localhost:8000"

  trap 'INT' do
    server.shutdown
  end

  server.start
end

def hello_world
  require "pp"

  # init a new cgi 'connection'
  cgi = CGI.new
  print cgi.header
  print "<br>"
  print "Useful cgi parameters and variables."
  print "<br>"
  print cgi.public_methods(false).sort
  print "<br>"
  print "<br>"
  print "referer: #{cgi.referer}<br>"
  print "path: #{URI(cgi.referer).path}<br>"
  print "host: #{cgi.host}<br>"
  print "client_sent_topdir: #{cgi["topdir"]}<br>"
  print "<br>"
  print "client_sent_reldir: #{cgi["reltop"]}<br>"
  print "<br>"
  print "ENV: "
  pp ENV
  print "<br>"
end


# assume that the file tree looks like this when rendering
# a git branch:
#
# root_dir
# |- index.html (the generated index of rendered git branches and tags)
# |- branch_1_top_dir
# |     |- index.html (the generated index of this branch)
# |     |- file_1.html
# |     |- dir_1
# |     |   |- file2.html
# |- branch_2_top_dir
# |- branch_x_...
# |- web_assets (only if a custom stylesheet is used...)
# |- search_assets
# |     |- branch_1_top_dir
# |           |- heading_index.json
# |           |- file1.adoc
# |           |- dir_1
# |           |   |- file2.html
# |           |- ...
# |     |- branch_2_top_dir
# |           | ...

# assume that the file tree looks like this when not
# rendering a git branch:
#
# root_dir
# |- index.html  (the generated index of all rendered files)
# |- file_1.html
# |- dir_1
# |   |- file2.html
# |...
# |- web_assets (only if a custom stylesheet is used...)
# |- search_assets
# |     |- heading_index.json
# |     |- file1.adoc
# |     |- dir_1
# |     |   |- file2.html
# |     |- ...

def cgi_main(cgi, debug_mode = false)
  # retrieve the form data supplied by user
  input_data = {
      search_phrase: cgi["searchphrase"],
      ignorecase: cgi.has_key?("ignorecase"),
      useregexp: cgi.has_key?("useregexp"),
      searchassetstop: Pathname.new(
          cgi.has_key?("searchassetstop") ? cgi["searchassetstop"] : ""),
      webassetstop: Pathname.new(
          cgi.has_key?("webassetstop") ? cgi["webassetstop"] : nil),
      client_css:
          cgi.has_key?("css") ? cgi["css"] : nil,
      referer: cgi.referer
  }

  if input_data[:searchassetstop].nil? || !Dir.exist?(input_data[:searchassetstop])
    raise ScriptError, "Could not find search_assets dir (#{input_data[:searchassetstop]}) !"
  end

  adoc_attributes = {
      "data-uri" => 1,
  }

# Set attributes so that the generated result page uses the same
# css as the other docs
  if !input_data[:client_css].nil? && !input_data[:webassetstop].nil?
    adoc_attributes.merge!(
        {
            "linkcss" => 1,
            "stylesdir" => "#{input_data[:webassetstop]}/css",
            "stylesheet" => input_data[:client_css],
            "copycss!" => 1
        }
    )
  end

  converter_options = {
      backend: "html5",
      # need this to let asciidoctor include the default css if user
      # has not specified any css
      safe: Asciidoctor::SafeMode::SAFE,
      header_footer: true,
      attributes: adoc_attributes
  }


# search the docs and render html
  sdt = SearchDocTree.new(input_data)
  docstr = sdt.search

  if debug_mode
    # print some useful data for debugging
    docstr = <<~EOF

      == Input data

      #{input_data.to_s}

      == Adoc attributes

       #{adoc_attributes.to_s}

       #{docstr}
    EOF
  end

# send the result back to the client
  print Asciidoctor.convert(docstr, converter_options)
end


# Usage:
#   to start a local web server for development work
# ruby giblish-search.cgi <web_root>
#
#   to run as a cgi script via a previously setup web server
# giblish-search.cgi
#
# (note that you might need to rename the script to eg
# giblish-search.cgi or similar depending on your web server
# setup)
#
if __FILE__ == $PROGRAM_NAME

  STDOUT.sync = true
  if ARGV.length == 0
    # 'Normal' cgi usage, as called from a web server

    # init a new cgi 'connection' and print headers
    cgi = CGI.new
    print cgi.header
    begin
      cgi_main(cgi, false)
    rescue Exception => e
      print e.message
      print ""
      print e.backtrace
      exit 1
    end
    exit 0
  end

  if ARGV.length == 1
    # Run a simple web server to test this locally..
    # and then create the html docs using:
    # giblish -c -m -w <web_root> -r <resource_dir> -s <style_name> -g <git_branch> <src_root> <web_root>
    init_web_server ARGV[0]
    exit 0
  end
end