require 'mechanize/file'
require 'mechanize/file_saver'
require 'mechanize/page'

##
# Mechanize allows different parsers for different content types.  Mechanize
# uses PluggableParser to determine which parser to use for any content type.
# To use your own parser or to change the default parsers, register them with
# this class through Mechanize#pluggable_parser.
#
# The default parser for unregistered content types is Mechanize::File.
#
# The module Mechanize::Parser provides basic functionality for any content
# type, so you may use it in custom parsers you write.  For small files you
# wish to perform in-memory operations on, you should subclass
# Mechanize::File.  For large files you should subclass Mechanize::Download as
# the content is only loaded into memory in small chunks.
#
# When writing your own pluggable parser, be sure to provide a method #body
# that returns a String containing the response body for compatibility with
# Mechanize#get_file.
#
# == Example
#
# To create your own parser, just create a class that takes four parameters in
# the constructor.  Here is an example of registering a parser that handles
# CSV files:
#
#   require 'csv'
#
#   class CSVParser < Mechanize::File
#     attr_reader :csv
#
#     def initialize uri = nil, response = nil, body = nil, code = nil
#       super uri, response, body, code
#       @csv = CSV.parse body
#     end
#   end
#
#   agent = Mechanize.new
#   agent.pluggable_parser.csv = CSVParser
#   agent.get('http://example.com/test.csv')  # => CSVParser
#
# Now any response with a content type of 'text/csv' will initialize a
# CSVParser and return that object to the caller.
#
# To register a parser for a content type that Mechanize does not know about,
# use the hash syntax:
#
#   agent.pluggable_parser['text/something'] = SomeClass
#
# To set the default parser, use #default:
#
#   agent.pluggable_parser.default = Mechanize::Download
#
# Now all unknown content types will be saved to disk and not loaded into
# memory.

class Mechanize::PluggableParser

  CONTENT_TYPES = {
    :html  => 'text/html',
    :wap   => 'application/vnd.wap.xhtml+xml',
    :xhtml => 'application/xhtml+xml',
    :pdf   => 'application/pdf',
    :csv   => 'text/csv',
    :xml   => 'text/xml',
  }

  attr_accessor :default

  def initialize
    @parsers = {
      CONTENT_TYPES[:html]  => Mechanize::Page,
      CONTENT_TYPES[:xhtml] => Mechanize::Page,
      CONTENT_TYPES[:wap]   => Mechanize::Page,
      'image'               => Mechanize::Image
    }

    @default = Mechanize::File
  end

  ##
  # Returns the parser registered for the given +content_type+

  def parser content_type
    return default unless content_type

    parser = @parsers[content_type]

    return parser if parser

    mime_type = MIME::Type.new content_type

    parser = @parsers[mime_type.to_s] ||
             @parsers[mime_type.simplified] ||
             @parsers[mime_type.media_type] ||
             default
  rescue MIME::InvalidContentType
    default
  end

  def register_parser content_type, klass # :nodoc:
    @parsers[content_type] = klass
  end

  ##
  # Registers +klass+ as the parser for text/html and application/xhtml+xml
  # content

  def html=(klass)
    register_parser(CONTENT_TYPES[:html], klass)
    register_parser(CONTENT_TYPES[:xhtml], klass)
  end

  ##
  # Registers +klass+ as the parser for application/xhtml+xml content

  def xhtml=(klass)
    register_parser(CONTENT_TYPES[:xhtml], klass)
  end

  ##
  # Registers +klass+ as the parser for application/pdf content

  def pdf=(klass)
    register_parser(CONTENT_TYPES[:pdf], klass)
  end

  ##
  # Registers +klass+ as the parser for text/csv content

  def csv=(klass)
    register_parser(CONTENT_TYPES[:csv], klass)
  end

  ##
  # Registers +klass+ as the parser for text/xml content

  def xml=(klass)
    register_parser(CONTENT_TYPES[:xml], klass)
  end

  ##
  # Retrieves the parser for +content_type+ content

  def [](content_type)
    @parsers[content_type]
  end

  ##
  # Sets the parser for +content_type+ content to +klass+
  #
  # The +content_type+ may either be a full MIME type a simplified MIME type
  # ('text/x-csv' simplifies to 'text/csv') or a media type like 'image'.

  def []= content_type, klass
    register_parser content_type, klass
  end

end