require 'parslet'
require 'marc/spec/parsing/closed_int_range'
require 'marc/spec/parsing/closed_lc_alpha_range'

module MARC
  module Spec
    module Parsing
      # rubocop:disable Style/BlockDelimiters
      # noinspection RubyResolve
      class Parser < Parslet::Parser

        # ------------------------------------------------------------
        # DSL extensions

        def closed_int_range
          ClosedIntRange.new
        end

        def closed_lc_alpha_range
          ClosedLcAlphaRange.new
        end

        # ------------------------------------------------------------
        # Parsing rules

        # alphaupper        = %x41-5A
        #                     ; A-Z
        rule(:alpha_upper) { match['A-Z'] }

        # alphalower        = %x61-7A
        #                     ; a-z
        rule(:alpha_lower) { match['a-z'] }

        # DIGIT             =  %x30-39
        #                     ; 0-9
        rule(:digit) { match['0-9'] }

        # VCHAR             =  %x21-7E
        #                     ; visible (printing) characters
        rule(:vchar) { match['\u0021-\u007e'] }

        # positiveDigit     = %x31-39
        #                     ;  "1" / "2" / "3" / "4" / "5" / "6" / "7" / "8" / "9"
        rule(:positive_digit) { match['1-9'] }

        # positiveInteger   = "0" / positiveDigit [1*DIGIT]
        #
        # NOTE: yes, this is a misnomer
        rule(:positive_integer) { str('0') | (positive_digit >> digit.repeat) }

        # fieldTag          = 3(alphalower / DIGIT / ".") / 3(alphaupper / DIGIT / ".")
        rule(:field_tag) {
          (alpha_lower | digit | str('.')).repeat(3, 3) | (alpha_upper | digit | str('.')).repeat(3, 3)
        }

        # position          = positiveInteger / "#"
        rule(:position) { positive_integer | str('#') }

        # Extracted from range, below
        #
        # NOTE: #-n means from (last index - n) to end of string
        rule(:left_open_range) { str('#').ignore.as(:from) >> str('-') >> (positive_integer | str('#').ignore).as(:to) }

        # Extracted from range, below
        #
        # NOTE: n-# means from position n to end of string
        rule(:right_open_range) { positive_integer.as(:from) >> str('-') >> str('#').ignore.as(:to) }

        # range             = position "-" position
        #
        # NOTE: n-# means from position n to end of string;
        #       #-n means from (last index - n) to end of string
        # rule(:range) { position.as(:from) >> str('-') >> position.as(:to) }
        rule(:range) { left_open_range | right_open_range | closed_int_range }

        # positionOrRange   = range / position
        rule(:position_or_range) { range | position.as(:pos) }

        # characterSpec     = "/" positionOrRange
        rule(:character_spec) { str('/') >> position_or_range.as(:character_spec) }

        # index             = "[" positionOrRange "]"
        rule(:index) { (str('[') >> position_or_range >> str(']')).as(:index) }

        # fieldSpec         = fieldTag [index] [characterSpec]
        rule(:field_spec) { field_tag.as(:tag) >> index.maybe >> character_spec.as(:selector).maybe }

        # abrFieldSpec      = index [characterSpec] / characterSpec
        rule(:abr_field_spec) do
          (index >> character_spec.as(:selector).maybe) | character_spec.as(:selector)
        end

        # subfieldChar      = %x21-3F / %x5B-7B / %x7D-7E
        #                     ; ! " # $ % & ' ( ) * + , - . / 0-9 : ; < = > ? [ \ ] ^ _ \` a-z { } ~
        # NOTE: Not just alphanumeric; see https://github.com/MARCspec/MARCspec/issues/31
        rule(:subfield_char) { match['\u0021-\u003f'] | match['\u005b-\u007b'] | match['\u007d-\u007e'] }

        # subfieldCode      = "$" subfieldChar
        rule(:subfield_code) { str('$').ignore >> subfield_char }

        # UNDOCUMENTED -- see spec/suite/valid/validSubfieldRange.json, https://github.com/MARCspec/MARCspec-Test-Suite/issues/1
        rule(:subfield_range) { (closed_lc_alpha_range | closed_int_range) }

        # subfieldCodeRange = "$" ( (alphalower "-" alphalower) / (DIGIT "-" DIGIT) )
        #                     ; [a-z]-[a-z] / [0-9]-[0-9]
        #
        # NOTE: docs don't insist the range be valid (start <= end), but tests enforce it
        rule(:subfield_code_range) { str('$').ignore >> subfield_range }

        # abrSubfieldSpec   = (subfieldCode / subfieldCodeRange) [index] [characterSpec]
        rule(:abr_subfield_spec) do
          ((subfield_code_range | subfield_code).as(:code) >> index.maybe >> character_spec.as(:sf_chars).maybe).as(:selector)
        end

        # subfieldSpec      = fieldTag [index] abrSubfieldSpec
        rule(:subfield_spec) { field_tag.as(:tag) >> index.maybe >> abr_subfield_spec }

        # UNDOCUMENTED -- see spec/suite/valid/validIndicators.json, https://github.com/MARCspec/MARCspec-Test-Suite/issues/1
        rule(:indicators) { str('1') | str('2') }

        # abrIndicatorSpec  = [index] "^" ("1" / "2")
        rule(:abr_indicator_spec) { index.maybe >> str('^') >> indicators.as(:ind).as(:selector) }

        # indicatorSpec     = fieldTag abrIndicatorSpec
        rule(:indicator_spec) { field_tag.as(:tag) >> abr_indicator_spec }

        # Extracted from comparisonString (some VCHARs need to be escaped,
        # and literal \ needs special handling)
        rule(:vchar_cs_plain) { match['\u0021-\u007e&&[^!$=?{|}~]'] }

        # Extracted from comparisonString (some VCHARs need to be escaped)
        rule(:vchar_cs_special) { match['!$=?{|}~'] }

        # Extracted from comparisonString (escaped)
        rule(:vchar_cs_esc) { (str('\\') >> vchar_cs_special) }

        # Extracted from comparisonString to simplify generated tests,
        # which don't take leading \ into account
        rule(:comparison_string) {
          # escape is optional in position 1, apparently
          head = (vchar_cs_special | vchar_cs_esc) | vchar_cs_plain
          tail = (vchar_cs_esc | vchar_cs_plain).repeat
          head >> tail
        }

        # comparisonString  = "\" *VCHAR
        #
        # NOTE: generated tests only handle the body of the string, not the
        #       leading \, so we give the full rule a separate name
        rule(:_comparison_string) { ((str('\\s') | str('\\').ignore) >> comparison_string).as(:comparison_string) }

        # operator          = "=" / "!=" / "~" / "!~" / "!" / "?"
        #                     ; equal / unequal / includes / not includes / not exists / exists
        rule(:operator) { (str('=') | str('!=') | str('~') | str('!~') | str('!') | str('?')) }

        # abbreviation      = abrFieldSpec / abrSubfieldSpec / abrIndicatorSpec
        rule(:abbreviation) { (abr_subfield_spec | abr_indicator_spec | abr_field_spec) }

        # subTerm           = fieldSpec / subfieldSpec / indicatorSpec / comparisonString / abbreviation
        rule(:sub_term) { subfield_spec | indicator_spec | field_spec | _comparison_string | abbreviation }

        # subTermSet        = [ [subTerm] operator ] subTerm
        rule(:sub_term_set) { (sub_term.as(:left).maybe >> operator.as(:operator)).maybe >> sub_term.as(:right) }

        # Extracted from subSpec for clarity
        rule(:_chained_sub_term_sets) { (sub_term_set >> (str('|') >> sub_term_set).repeat(1)).as(:any_condition) }

        # NOTE: generated tests are properly for subSpec*, so we give the
        #       single one a separate name
        #
        # subSpec           = "{" subTermSet *( "|" subTermSet ) "}"
        rule(:_sub_spec) { str('{') >> (_chained_sub_term_sets | sub_term_set) >> str('}') }

        # Extracted from SubSpec for clarity
        rule(:_repeated_sub_specs) { _sub_spec.repeat(2).as(:all_conditions) }

        # Repeated to satisfy generated tests
        rule(:sub_spec) { _repeated_sub_specs | _sub_spec }

        # Rewritten from MARCspec for clarity
        #  (subfieldSpec *subSpec *(abrSubfieldSpec *subSpec))
        #  -> (fieldTag [index] *(abrSubfieldSpec *subSpec))
        rule(:_multiple_subfield_spec) {
          (field_tag.as(:tag) >> index.maybe) >>
            (abr_subfield_spec >> sub_spec.as(:condition).maybe).repeat(2).as(:subqueries)
        }

        # Extracted from MARCspec for clarity:
        #   (subfieldSpec *subSpec *(abrSubfieldSpec *subSpec))
        # Rewritten for ease of parsing:
        #   (fieldTag [index] *(abrSubfieldSpec *subSpec))
        rule(:_varfield_marc_spec) {
          _multiple_subfield_spec | (subfield_spec >> sub_spec.as(:condition).maybe)
        }

        # Extracted from MARCspec for clarity
        # indicatorSpec *subSpec
        rule(:_indicator_marc_spec) { (indicator_spec >> sub_spec.as(:condition).maybe) }

        # Extracted from MARCspec for clarity
        # fieldSpec *subSpec
        rule(:_fixedfield_marc_spec) { (field_spec >> sub_spec.as(:condition).maybe) }

        # MARCspec          = fieldSpec *subSpec / (subfieldSpec *subSpec *(abrSubfieldSpec *subSpec)) / indicatorSpec *subSpec
        rule(:marc_spec) { _varfield_marc_spec | _indicator_marc_spec | _fixedfield_marc_spec }

        root(:marc_spec)
      end

      # rubocop:enable Style/BlockDelimiters
    end
  end
end