# frozen_string_literal: true require "countries" unless defined?(ISO3166::Country) module Lite module Address class Parser LOOKUPS = %i[any formal informal intersectional].freeze CAPITALIZATION_PARTS = %w[street street_type street2 street_type2 city unit_prefix].freeze STREET_POSITIONS = ["", "1", "2"].freeze attr_reader :address, :country_code def initialize(address, country_code: "US") @address = sanitize_address(address) @country_code = sanitize_country_code(country_code) end class << self LOOKUPS.each do |method_name| define_method(method_name) do |address, args = {}| instance = new(address, country_code: args.delete(:country_code) || "US") instance.public_send(method_name, args) end end end def any(args = {}) return intersectional(args) if regexp.corner.match(address) formal(args) || informal(args) end def formal(args = {}) return unless (match = regexp.formal_address.match(address)) map = match_map(match) generate_address(map, args) end def informal(args = {}) return unless (match = regexp.informal_address.match(address)) map = match_map(match) generate_address(map, args) end def intersectional(args = {}) return unless (match = regexp.intersectional_address.match(address)) map = match_map(match) intersectional_submatch(match, map, "street") intersectional_submatch(match, map, "street_type") intersectional_rematch(match, map, "street_type") generate_address(map, args) end protected def country @country ||= ISO3166::Country.new(country_code) end def list @list ||= Lite::Address::List.new(country) end def regexp @regexp ||= Lite::Address::Regexp.new(list) end private def sanitize_address(value) value.delete_prefix("(").delete_suffix(")") end def sanitize_country_code(value) value.to_s.upcase end def match_map(match) match.names.each_with_object({}) do |name, hash| hash[name] = match[name] if match[name] end end def normalization_map @normalization_map ||= { "prefix" => list.cardinal_types, "prefix1" => list.cardinal_types, "prefix2" => list.cardinal_types, "suffix" => list.cardinal_types, "suffix1" => list.cardinal_types, "suffix2" => list.cardinal_types, "street_type" => list.street_types, "street_type1" => list.street_types, "street_type2" => list.street_types, "state" => list.subdivision_names } end # rubocop:enable Metrics/AbcSize def intersectional_submatch(match, map, part) parts = regexp.intersectional_address.named_captures parts = parts[part].filter_map { |i| match[i.to_i] } map[part] = parts[0] if parts[0] map["#{part}2"] = parts[1] if parts[1] end def intersectional_rematch(_match, map, part) return unless map[part] && (!map["#{part}2"] || (map[part] == map["#{part}2"])) type = map[part].dup return unless type.gsub!(/s\W*$/i, "") && (/\A#{regexp.public_send(part)}\z/io =~ type) map[part] = map["#{part}2"] = type end def address_strip_chars(map) map.each do |key, string| string.strip! if key == "number" string.gsub!(%r{[^\w\s\-\#&/.]}, "") else string.gsub!(%r{[^\w\s\-\#&/]}, "") end end end def address_redundantize_street_type(map) map["redundant_street_type"] = false return unless map["street"] && !map["street_type"] match = regexp.street.match(map["street"]) map["street_type"] = match["street_type"] if match map["redundant_street_type"] = true end def address_abbreviate_unit_prefixes(map) list.unit_abbr_regexps.each do |abbr, regex| regex.match(map["unit_prefix"]) do |_match| map["unit_prefix"] = abbr end end end def address_normalize_values(map) normalization_map.each do |key, hash| next unless (map_key = map[key]) mapping = hash[map_key.downcase] map[key] = mapping if mapping end end def address_avoid_redundant_street_type(map) STREET_POSITIONS.each do |suffix| street = map["street#{suffix}"] street_type = map["street_type#{suffix}"] next if !street || !street_type type_regexp = list.street_type_regexps[street_type.downcase] next unless type_regexp.match(street) map.delete("street_type#{suffix}") end end def address_expand_cardinals(map) return unless map["city"] map["city"].gsub!(/^(#{regexp.cardinal_code})\s+(?=\S)/o) do |match| "#{list.cardinal_codes[match[0].upcase]} " end end def address_fix_dirty_ordinals(map) # Sometimes parcel data will have addresses like "1 1ST ST" as "1 1 ST ST" return unless map["street"] map["street"].gsub!(/\A(\d+\s+st|\d+\s+nd|\d+\s+rd|\d+\s+th)\z/i) do |match| match.gsub!(/\s+/, "") end end def address_capitalize_parts(map) CAPITALIZATION_PARTS.each do |k| map[k] = map[k].split.map(&:capitalize).join(" ") if map[k] end end def generate_address(map, args = {}) address_strip_chars(map) address_redundantize_street_type(map) address_abbreviate_unit_prefixes(map) address_normalize_values(map) address_avoid_redundant_street_type(map) if args[:avoid_redundant_street_type] address_expand_cardinals(map) address_fix_dirty_ordinals(map) address_capitalize_parts(map) map[:country] = country Lite::Address::Format.new(list:, **map) end end end end