Sha256: 7a34f370b9da876673c739f9633104a2f8742ee86ee086ffe9a1509792ddf501

Contents?: true

Size: 1.56 KB

Versions: 1

Compression:

Stored size: 1.56 KB

Contents

module PicturehouseUk
  # @api private
  module Internal
    # Sanitize and standardize film titles
    class TitleSanitizer
      # strings and regex to be removed
      REMOVE = [
        /\s\[(AS LIVE: )?[ACPGU1258]+\]/, # regular certificate
        /\s+[23][dD]/,                    # 2d or 3d from title
        /\s\[NO CERT\]/,                  # no certificate
        /\s\[\]/,                         # blank certificate
        /ourscreen\: /,                   # ourscreen
        /\s\(Re(\: \d{0,4})?\)/i,         # Re-release
        /\s\[CERT TBC\]/,                 # certificate TBC
      ]

      # regexes and their replacements
      REPLACE = {
        /Met\.? Encore: (.*)/ => 'Met Opera:',
        /Met\.? Opera: (.*)/  => 'Met Opera: ',
        /NT Encore: (.*)/     => 'National Theatre:',
        /NT Live: (.*)/       => 'National Theatre:',
        /ROH\.? Live: (.*)/   => 'Royal Opera House:',
        /RSC\.? Live: (.*)/   => 'Royal Shakespeare Company:',
        /RSC\.? Encore: (.*)/ => 'Royal Shakespeare Company:'
      }

      # @param [String] title a film title
      def initialize(title)
        @title = title
      end

      # sanitized and standardized title
      # @return [String] title
      def sanitized
        @sanitzed ||= begin
          sanitized = @title
          REMOVE.each do |pattern|
            sanitized.gsub! pattern, ''
          end
          REPLACE.each do |pattern, prefix|
            sanitized.gsub!(pattern) { |_| prefix + $1 }
          end
          sanitized.squeeze(' ').strip
        end
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
picturehouse_uk-2.0.0 lib/picturehouse_uk/internal/title_sanitizer.rb