# encoding: UTF-8

module NdrImport
  # This mixin adds (multiline) non-tabular file functionality to unified importers.
  # It provides a file reader method and method to capture the rawtext value
  # appropriately. These methods can be overridden or aliased as required.
  #
  # The YAML mapping must define the start_line_pattern which identifies the start
  # of a multiline record (or "row") and can optionally define an end_line_pattern.
  module NonTabularFileHelper
    require 'i18n'
    require 'ndr_support/regexp_range' # TODO: unneeded?
    require 'ndr_support/utf8_encoding'
    require 'ndr_import/non_tabular/column_mapping'
    require 'ndr_import/non_tabular/record'
    require 'ndr_import/non_tabular/line'
    require 'ndr_import/non_tabular/mapping'

    include UTF8Encoding

    attr_reader :non_tabular_lines

    protected

    # Reads a non-tabular text file and returns an array of tabulated rows of data,
    # where each row is an array of cells.
    def read_non_tabular_file
      self.non_tabular_lines = ensure_utf8_object! SafeFile.readlines(filename)
      remove_unwanted_lines
      read_non_tabular_array
    end

    # Reads a string and returns an array of tabulated data. Use only for prototyping.
    def read_non_tabular_string(text)
      self.non_tabular_lines = ensure_utf8_object!(text).split("\n")
      remove_unwanted_lines
      read_non_tabular_array
    end

    # This method flages unwanted lines, typically page headers and footers as removed
    # preventing them from being captured in the non tabular record. Especially useful
    # when there page headers and footers that are out of step with the start and end
    # of each record and could therefore appear anywhere in an individual record if kept.
    def remove_unwanted_lines
      return unless row_mapping.remove_lines.is_a?(Hash)
      @non_tabular_lines.each_with_index do |_line, i|
        row_mapping.remove_lines.each do |_key, lines_to_remove|
          comparable_lines = @non_tabular_lines[i, lines_to_remove.length]
          next unless lines_equal(comparable_lines, lines_to_remove)
          # All lines are equal, so flag them as removed
          comparable_lines.each { |line| line.removed = true }
        end
      end
    end

    def read_non_tabular_array
      @tabular_array = []
      @in_a_record = row_mapping.start_in_a_record
      @non_tabular_record = NdrImport::NonTabular::Record.new

      partition_and_process_non_tabular_lines
      process_end_of_record

      # We change the mapping instance variable to only contain the column mappings.
      # This enables the standard mapper to work unaltered.
      @mappings = raw_column_mappings
      @tabular_array
    end

    # Reads the array of lines, looking to see if a line matches the start_line_pattern,
    # identifying the start of a record. It then collects all the lines until a line
    # matches the end_line_pattern (if defined, otherwise when it matches the next
    # start_line_pattern) and sends these line to NdrImport::NonTabular::Record#tabulate.
    #
    # NOTE: Currently the end line is consumed and does not form part of the
    # collected array.
    def partition_and_process_non_tabular_lines
      non_tabular_lines.each do |line|
        if line =~ row_mapping.start_line_pattern
          # This is a start line
          start_record(line)
        elsif line =~ row_mapping.end_line_pattern
          # This is an end line
          end_record
        else
          @non_tabular_record << line if @in_a_record
        end
      end
    end

    # Checks to see if we get the start of a new record before getting the end of the previous
    # one and fails if so. Otherwise it tabulates the previous record
    def start_record(line)
      if row_mapping.end_line_pattern
        fail NdrImport::MappingError,
             I18n.t('mapping.errors.start_pattern_before_end') if @in_a_record
      else
        # No endline mapping
        @tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
      end
      @non_tabular_record = NdrImport::NonTabular::Record.new
      @non_tabular_record << line if row_mapping.capture_start_line
      @in_a_record = true
    end

    # Tabulate the record (if in one), flagged it as no longer being in a record
    # and set the record to be a new one.
    def end_record
      @tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
      @in_a_record = false
      @non_tabular_record = NdrImport::NonTabular::Record.new
    end

    # If the non-tabular data ends in a record (i.e. the last record is terminated by the EOF)
    # then we need to process the last record manually or flag those lines as not being part
    # of a record
    def process_end_of_record
      return if @non_tabular_record.empty?
      if row_mapping.end_in_a_record
        @tabular_array << @non_tabular_record.tabulate(column_mappings) if @in_a_record
      else
        @non_tabular_record.not_a_record!
      end
    end

    # Store the source lines as instances of NdrImport::NonTabular::Line
    def non_tabular_lines=(lines)
      @non_tabular_lines = lines.map.with_index do |line, i|
        NdrImport::NonTabular::Line.new(line, i)
      end
    end

    # Create and memoize the row mappings
    def row_mapping
      @row_mapping ||= NdrImport::NonTabular::Mapping.new(@mappings)
    end

    # Create and memoize the column mappings
    def column_mappings
      @column_mappings ||= raw_column_mappings.map do |column_mapping|
        NdrImport::NonTabular::ColumnMapping.new(column_mapping)
      end
    end

    def raw_column_mappings
      @mappings['columns'] || []
    end

    # This method compares two arrays, where the first must be an array of
    # NdrImport::NonTabular::Line or string elements
    # and the second can be a mix of strings and/or regular expressions
    def lines_equal(lines, other_lines)
      return false unless lines.length == other_lines.length
      lines.each_with_index.map do |line, i|
        other_line = other_lines[i]
        other_line.is_a?(Regexp) ? line.to_s =~ other_line : line.to_s == other_line
      end.all?
    end
  end
end