Sha256: fe4ff476442d10f1b491fdc294e5c53784c7cc3059bfd8e0700d2aa0e5b701a7

Contents?: true

Size: 1.89 KB

Versions: 6

Compression:

Stored size: 1.89 KB

Contents

module PicturehouseUk
  # @api private
  module Internal
    # Parses a chunk of HTML to derive address
    class AddressParser
      # CSS for address lines
      ADDRESS_LINES_CSS = '.cinemaListBox'

      # regular expression for postal code
      POSTCODE_REGEX = /[A-Z]{1,2}\d{1,2}[A-Z]?\s\d{1,2}[A-Z]{1,2}/

      # @param [String] node the HTML to parse into an address
      # @return [PicturehouseUk::Internal::AddressParser]
      def initialize(html)
        @html = html
      end

      # @return [Hash] contains :street_address, :extended_address, :locality,
      # :postal_code, :country
      # @note Uses the address naming from http://microformats.org/wiki/adr
      def address
        {
          street_address: array[0],
          extended_address: array.length > 3 ? array[1] : nil,
          locality: array[-2],
          postal_code: array[-1],
          country: 'United Kingdom'
        }
      end

      private

      def array
        @array ||= begin
          if standard?
            lines[0..postal_code_index(lines)]
          else
            # this is a horrendous hack for Hackney Picturehouse
            doc.css('p').to_s.split('Box Office')[0].split('<br> ')[1..-1]
          end
        end
      end

      def doc
        @doc ||= Nokogiri::HTML(@html)
      end

      def matched_lines
        @matched_lines ||= begin
          matched = doc.css(ADDRESS_LINES_CSS).map { |n| n.children[0].to_s }
          matched.reject { |e| e == '' }
        end
      end

      def postal_code_index(array)
        array.index { |element| element.match(POSTCODE_REGEX) }
      end

      def lines
        if matched_lines.length > 0 && matched_lines[0].match(/\d+\Z/) # komedia
          ["#{matched_lines[0]} #{matched_lines[1]}"] + matched_lines[2..-1]
        else
          matched_lines
        end
      end

      def standard?
        lines && lines.length > 0
      end
    end
  end
end

Version data entries

6 entries across 6 versions & 1 rubygems

Version Path
picturehouse_uk-2.0.5 lib/picturehouse_uk/internal/address_parser.rb
picturehouse_uk-2.0.4 lib/picturehouse_uk/internal/address_parser.rb
picturehouse_uk-2.0.3 lib/picturehouse_uk/internal/address_parser.rb
picturehouse_uk-2.0.2 lib/picturehouse_uk/internal/address_parser.rb
picturehouse_uk-2.0.1 lib/picturehouse_uk/internal/address_parser.rb
picturehouse_uk-2.0.0 lib/picturehouse_uk/internal/address_parser.rb