require 'stepmod/utils/stepmod_definition_converter'
require 'stepmod/utils/bibdata'
require 'stepmod/utils/concept'

ReverseAdoc.config.unknown_tags = :bypass

module Stepmod
  module Utils
    class TermsExtractor
      # TODO: we may want a command line option to override this in the future
      ACCEPTED_STAGES = %w(IS DIS FDIS TS)

      attr_reader :stepmod_path,
        :stepmod_dir,
        :general_concepts,
        :resource_concepts,
        :parsed_bibliography,
        :encountered_terms,
        :cvs_mode,
        :part_concepts,
        :part_resources,
        :part_modules,
        :stdout

      def self.call(stepmod_dir, stdout = STDOUT)
        new(stepmod_dir, stdout).call
      end

      def initialize(stepmod_dir, stdout)
        @stdout = stdout
        @stepmod_dir = stepmod_dir
        @stepmod_path = Pathname.new(stepmod_dir).realpath
        @general_concepts = []
        @resource_concepts = []
        @parsed_bibliography = []
        @part_concepts = []
        @part_resources = []
        @part_modules = []
        @encountered_terms = {}
      end

      def log message
        stdout.puts "[stepmod-utils] #{message}"
      end

      def term_special_category(bibdata)
        case bibdata.part.to_i
        when 41,42,43,44,45,46,47,51
          true
        when [56..112]
          true
        else
          false
        end
      end

      def call
        # If we are using the stepmod CVS repository, provide the revision number per file
        @cvs_mode = if Dir.exists?(stepmod_path.join('CVS'))
          require 'ptools'
          # ptools provides File.which
          File.which("cvs")
        end

        log "INFO: STEPmod directory set to #{stepmod_dir}."

        if cvs_mode
          log "INFO: STEPmod directory is a CVS repository and will detect revisions."
          log "INFO: [CVS] Detecting file revisions can be slow, please be patient!"
        else
          log "INFO: STEPmod directory is not a CVS repository, skipping revision detection."
        end

        log "INFO: Detecting paths..."

        repo_index = Nokogiri::XML(File.read(stepmod_path.join('repository_index.xml'))).root

        files = []

        # add module paths
        repo_index.xpath('//module').each do |x|
          path = Pathname.new("#{stepmod_dir}/modules/#{x['name']}/module.xml")
          files << path if File.exists? path
        end

        # add resource_docs paths
        repo_index.xpath('//resource_doc').each do |x|
          path = Pathname.new("#{stepmod_dir}/resource_docs/#{x['name']}/resource.xml")
          files << path if File.exists? path
        end

        # add business_object_models paths
        repo_index.xpath('//business_object_model').each do |x|
          path = Pathname.new("#{stepmod_dir}/business_object_models/#{x['name']}/business_object_model.xml")
          files << path if File.exists? path
        end

        # add application_protocols paths
        repo_index.xpath('//application_protocol').each do |x|
          path = Pathname.new("#{stepmod_dir}/application_protocols/#{x['name']}/application_protocol.xml")
          files << path if File.exists? path
        end

        files.sort!.uniq!
        process_term_files(files)

        [
          general_concepts,
          resource_concepts,
          parsed_bibliography,
          part_concepts,
          part_resources,
          part_modules
        ]
      end

      private

      def process_term_files(files)
        parsed_schema_names = {}
        files.each do |file_path|
          file_path = file_path.realpath
          fpath = file_path.relative_path_from(stepmod_path)

          log "INFO: Processing XML file #{fpath}"
          current_document = Nokogiri::XML(File.read(file_path)).root

          bibdata = nil
          begin
            bibdata = Stepmod::Utils::Bibdata.new(document: current_document)
          rescue
            log "WARNING: Unknown file #{fpath}, skipped"
            next
          end

          unless ACCEPTED_STAGES.include? bibdata.doctype
            log "INFO: skipped #{bibdata.docid} as it is not one of (#{ACCEPTED_STAGES.join(", ")})."
            next
          end

          if bibdata.part.to_s.empty?
            log "FATAL: missing `part` attribute: #{fpath}"
            log "INFO: skipped #{bibdata.docid} as it is missing `part` attribute."
            next
          end

          revision_string = "\n// CVS: revision not detected"
          if cvs_mode
            # Run `cvs status` to find out version

            log "INFO: Detecting CVS revision..."
            Dir.chdir(stepmod_path) do
              status = `cvs status #{fpath}`

              unless status.empty?
                working_rev = status.split(/\n/).grep(/Working revision:/).first.match(/revision:\s+(.+)$/)[1]
                repo_rev = status.split(/\n/).grep(/Repository revision:/).first.match(/revision:\t(.+)\t/)[1]
                log "INFO: CVS working rev (#{working_rev}), repo rev (#{repo_rev})"
                revision_string = "\n// CVS working rev: (#{working_rev}), repo rev (#{repo_rev})\n" +
                  "// CVS: revision #{working_rev == repo_rev ? 'up to date' : 'differs'}"
              end
            end
          end

          # read definitions
          current_part_concepts = []
          definition_index = 0
          current_document.xpath('//definition').each do |definition|
            definition_index += 1
            term_id = definition['id']
            unless term_id.nil?
              if encountered_terms[term_id]
                log "FATAL: Duplicated term with id: #{term_id}, #{fpath}"
              end
              encountered_terms[term_id] = true
            end

            # Assume that definition is located in clause 3 of the ISO document
            # in order. We really don't have a good reference here.
            ref_clause = "3.#{definition_index}"

            concept = Stepmod::Utils::Concept.parse(
              definition,
              reference_anchor: bibdata.anchor,
              reference_clause: ref_clause,
              file_path: fpath + revision_string
            )
            next unless concept

            unless term_special_category(bibdata)
              # log "INFO: this part is generic"
              general_concepts << concept
            else
              # log "INFO: this part is special"
              current_part_concepts << concept
            end

            parsed_bibliography << bibdata
          end

          current_part_resources = []
          current_part_modules_arm = {}
          current_part_modules_mim = {}

          log "INFO: FILE PATH IS #{file_path}"
          case file_path.to_s
          when /resource.xml$/
            log "INFO: Processing resource.xml for #{file_path}"
            # Assumption: every schema is only linked by a single resource_docs document.
            current_document.xpath('//schema').each do |schema_node|
              schema_name = schema_node['name']
              if parsed_schema_names[schema_name]
                log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
                next
              else
                parsed_schema_names[schema_name] = file_path
              end

              Dir["#{stepmod_path}/resources/#{schema_name}/descriptions.xml"].each do |description_xml_path|
                log "INFO: Processing resources schema #{description_xml_path}"
                description_document = Nokogiri::XML(File.read(description_xml_path)).root
                description_document.xpath('//ext_description').each do |ext_description|

                  # log "INFO: Processing linkend[#{ext_description['linkend']}]"

                  concept = Stepmod::Utils::Concept.parse(
                    ext_description,
                    reference_anchor: bibdata.anchor,
                    reference_clause: nil,
                    file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
                  )
                  next unless concept

                  unless term_special_category(bibdata)
                    # log "INFO: this part is generic"
                    resource_concepts << concept
                  else
                    # log "INFO: this part is special"
                    current_part_resources << concept
                  end

                  parsed_bibliography << bibdata
                end
              end
            end

          when /module.xml$/
            log "INFO: Processing module.xml for #{file_path}"
            # Assumption: every schema is only linked by a single module document.
            # puts current_document.xpath('//module').length
            schema_name = current_document.xpath('//module').first['name']
            if parsed_schema_names[schema_name]
              log "ERROR: We have encountered this schema before: #{schema_name} from path #{parsed_schema_names[schema_name]}, now at #{file_path}"
              next
            else
              parsed_schema_names[schema_name] = file_path
            end

            description_xml_path = "#{stepmod_path}/modules/#{schema_name}/arm_descriptions.xml"
            log "INFO: Processing modules schema #{description_xml_path}"

            if File.exists?(description_xml_path)
              description_document = Nokogiri::XML(File.read(description_xml_path)).root
              description_document.xpath('//ext_description').each do |ext_description|

                linkend_schema = ext_description['linkend'].split('.').first
                concept = Stepmod::Utils::Concept.parse(
                  ext_description,
                  reference_anchor: bibdata.anchor,
                  reference_clause: nil,
                  file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
                )
                next unless concept

                current_part_modules_arm[linkend_schema] ||= []
                current_part_modules_arm[linkend_schema] << concept
                # puts part_modules_arm.inspect
                parsed_bibliography << bibdata
              end
            end

            description_xml_path = "#{stepmod_path}/modules/#{schema_name}/mim_descriptions.xml"
            log "INFO: Processing modules schema #{description_xml_path}"

            if File.exists?(description_xml_path)
              description_document = Nokogiri::XML(File.read(description_xml_path)).root
              description_document.xpath('//ext_description').each do |ext_description|

                linkend_schema = ext_description['linkend'].split('.').first

                concept = Stepmod::Utils::Concept.parse(
                  ext_description,
                  reference_anchor: bibdata.anchor,
                  reference_clause: nil,
                  file_path: Pathname.new(description_xml_path).relative_path_from(stepmod_path)
                )
                next unless concept

                current_part_modules_mim[linkend_schema] ||= []
                current_part_modules_mim[linkend_schema] << concept

                parsed_bibliography << bibdata
              end
            end

          end

          log "INFO: Completed processing XML file #{fpath}"
          if current_part_concepts.empty?
            log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it contains no concepts."
          elsif current_part_concepts.length < 3
            log "INFO: Skipping #{fpath} (#{bibdata.docid}) because it only has #{current_part_concepts.length} terms."

            current_part_concepts.each do |x|
              general_concepts << x
            end
          else
            part_concepts << [bibdata, current_part_concepts] unless current_part_concepts.empty?
          end
          part_resources << [bibdata, current_part_resources] unless current_part_resources.empty?
          part_modules << [bibdata, current_part_modules_arm, current_part_modules_mim] if current_part_modules_arm.size + current_part_modules_mim.size > 0
        end
      end
    end
  end
end