lib/indirizzo/address.rb in Indirizzo-0.1.0 vs lib/indirizzo/address.rb in Indirizzo-0.1.1

- old
+ new

@@ -1,16 +1,17 @@ require 'indirizzo/constants' +require 'awesome_print' module Indirizzo # Defines the matching of parsed address tokens. Match = { # FIXME: shouldn't have to anchor :number and :zip at start/end :number => /^(\d+\W|[a-z]+)?(\d+)([a-z]?)\b/io, :street => /(?:\b(?:\d+\w*|[a-z'-]+)\s*)+/io, - :city => /(?:\b[a-z'-]+\s*)+/io, - :state => Regexp.new(State.regexp.source + "\s*$", Regexp::IGNORECASE), - :zip => /(\d{5})(?:-\d{4})?\s*$/o, + :city => /(?:\b[a-z][a-z'-]+\s*)+/io, + :state => State.regexp, + :zip => /\b(\d{5})(?:-(\d{4}))?\b/o, :at => /\s(at|@|and|&)\s/io, :po_box => /\b[P|p]*(OST|ost)*\.*\s*[O|o|0]*(ffice|FFICE)*\.*\s*[B|b][O|o|0][X|x]\b/ } # The Address class takes a US street address or place name and @@ -21,10 +22,11 @@ attr_accessor :prenum, :number, :sufnum attr_accessor :street attr_accessor :city attr_accessor :state attr_accessor :zip, :plus4 + attr_accessor :country # Takes an address or place name string as its sole argument. def initialize (text) raise ArgumentError, "no text provided" unless text and !text.empty? if text.class == Hash @@ -120,84 +122,88 @@ strings << string end strings end - def parse_zip(regex_match, text) - idx = text.rindex(regex_match) - text[idx...idx+regex_match.length] = "" - text.sub! /\s*,?\s*$/o, "" - @zip, @plus4 = @zip.map {|s|s.strip} - text - end - def parse_state(regex_match, text) idx = text.rindex(regex_match) - text[idx...idx+regex_match.length] = "" - text.sub! /\s*,?\s*$/o, "" @full_state = @state[0].strip # special case: New York @state = State[@full_state] + @city = "Washington" if @state == "DC" && text[idx...idx+regex_match.length] =~ /washington\s+d\.?c\.?/i text end - def parse_number(regex_match, text) - # FIXME: What if this string appears twice? - idx = text.index(regex_match) - text[idx...idx+regex_match.length] = "" - text.sub! /^\s*,?\s*/o, "" - @prenum, @number, @sufnum = @number.map {|s| s and s.strip} - text - end - def parse text = @text.clone.downcase - @zip = text.scan(Match[:zip])[-1] + @zip = text.scan(Match[:zip]).last if @zip - text = parse_zip($&, text) + last_match = $& + zip_index = text.rindex(last_match) + zip_end_index = zip_index + last_match.length - 1 + @zip, @plus4 = @zip.map {|s| s and s.strip } else @zip = @plus4 = "" + zip_index = text.length + zip_end_index = -1 end - @state = text.scan(Match[:state])[-1] + @country = @text[zip_end_index+1..-1].sub(/^\s*,\s*/, '').strip + @country = nil if @country == text + + @state = text.scan(Match[:state]).last if @state - text = parse_state($&, text) + last_match = $& + state_index = text.rindex(last_match) + text = parse_state(last_match, text) else @full_state = "" @state = "" end - @number = text.scan(Match[:number])[0] + @number = text.scan(Match[:number]).first # FIXME: 230 Fish And Game Rd, Hudson NY 12534 if @number # and not intersection? - text = parse_number($&, text) + last_match = $& + number_index = text.index(last_match) + number_end_index = number_index + last_match.length - 1 + @prenum, @number, @sufnum = @number.map {|s| s and s.strip} else + number_end_index = -1 @prenum = @number = @sufnum = "" end # FIXME: special case: Name_Abbr gets a bit aggressive # about replacing St with Saint. exceptional case: # Sault Ste. Marie # FIXME: PO Box should geocode to ZIP - @street = text.scan(Match[:street]) + street_search_end_index = [state_index,zip_index,text.length].reject(&:nil?).min-1 + @street = text[number_end_index+1..street_search_end_index].scan(Match[:street]).map { |s| s and s.strip } + @street = expand_streets(@street) # SPECIAL CASE: 1600 Pennsylvania 20050 @street << @full_state if @street.empty? and @state.downcase != @full_state.downcase - @city = text.scan(Match[:city]) - if !@city.empty? - @city = [@city[-1].strip] - add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} - @city |= add - @city.map! {|s| s.downcase} - @city.uniq! - else - @city = [] + street_end_index = @street.map { |s| text.rindex(s) }.reject(&:nil?).min||0 + + if @city.nil? || @city.empty? + @city = text[street_end_index..street_search_end_index+1].scan(Match[:city]) + if !@city.empty? + #@city = [@city[-1].strip] + @city = [@city.last.strip] + add = @city.map {|item| item.gsub(Name_Abbr.regexp) {|m| Name_Abbr[m]}} + @city |= add + @city.map! {|s| s.downcase} + @city.uniq! + else + @city = [] + end + + # SPECIAL CASE: no city, but a state with the same name. e.g. "New York" + @city << @full_state if @state.downcase != @full_state.downcase end - # SPECIAL CASE: no city, but a state with the same name. e.g. "New York" - @city << @full_state if @state.downcase != @full_state.downcase end def expand_streets(street) if !street.empty? && !street[0].nil? street.map! {|s|s.strip}