# TODO: trim down these require statements to only include upper level
require 'mspire'
require 'builder'
require 'core_ext/enumerable'

require 'mspire/mzml/reader'

require 'mspire/mzml/scan_settings'

module Mspire
  # Reading an mzml file:
  #
  #     Mspire::Mzml.open("somefile.mzML") do |mzml|
  #       mzml.each do |spectrum|
  #         scan = spectrum.scan
  #         spectrum.mzs                  # array of m/zs
  #         spectrum.intensities          # array of intensities
  #         spectrum.peaks do |mz,intensity|
  #           puts "mz: #{mz} intensity: #{intensity}" 
  #         end
  #       end
  #     end
  #
  # Note that the mzml object supports random spectrum access (even if the
  # mzml was not indexed):
  #
  #     mzml[22]  # retrieve spectrum at index 22
  #
  # Writing an mzml file from scratch:
  #
  #     spec1 = Mspire::Mzml::Spectrum.new('scan=1') do |spec|
  #       spec.describe_many! ['MS:1000127', ['MS:1000511', 1]]
  #       spec.data_arrays = [[1,2,3], [4,5,6]]
  #       spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
  #         scan = Mspire::Mzml::Scan.new do |scan|
  #           # retention time of 40 seconds
  #           scan.describe! ['MS:1000016', 40.0, 'UO:0000010']
  #         end
  #         sl << scan
  #       end
  #     end
  #
  #     mzml = Mspire::Mzml.new do |mzml|
  #       mzml.id = 'the_little_example'
  #       mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
  #       mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
  #         fd.file_content = Mspire::Mzml::FileContent.new
  #         fd.source_files << Mspire::Mzml::SourceFile.new
  #       end
  #       default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC",[])
  #       default_instrument_config.describe! 'MS:1000031'
  #       mzml.instrument_configurations << default_instrument_config
  #       software = Mspire::Mzml::Software.new
  #       mzml.software_list << software
  #       default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
  #       mzml.data_processing_list << default_data_processing
  #       mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run|
  #         spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing)
  #         spectrum_list.push(spec1)
  #         run.spectrum_list = spectrum_list
  #       end
  #     end
  class Mzml
    include Enumerable  # each_spectrum

    class << self
      # read-only right now
      def open(filename, &block)
        File.open(filename) do |io|
          block.call(self.new(io))
        end
      end

      def foreach(filename, &block)
        block or return enum_for(__method__, filename)
        open(filename) do |mzml|
          mzml.each(&block)
        end
      end
    end

    module Default
      NAMESPACE = {
        :xmlns => "http://psi.hupo.org/ms/mzml",
        "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance", 
        "xmlns:xsd" => "http://www.w3.org/2001/XMLSchema", 
      }

      VERSION = '1.1.0'
    end

    ###############################################
    # ATTRIBUTES
    ###############################################

    # (optional) an id for accessing from external files
    attr_accessor :id
   
    # (required) the Mzml document version
    attr_accessor :version

    # (optional) e.g. a PRIDE accession number
    attr_accessor :accession

    ###############################################
    # SUBELEMENTS
    ###############################################

    # (required) an array of Mspire::Mzml::CV objects
    attr_accessor :cvs

    # (required) an Mspire::Mzml::FileDescription
    attr_accessor :file_description

    # (optional) an array of CV::ReferenceableParamGroup objects
    attr_accessor :referenceable_param_groups

    # (optional) an array of Mspire::Mzml::Sample objects
    attr_accessor :samples

    # (required) an array of Mspire::Mzml::Software objects 
    attr_accessor :software_list

    # (optional) an array of Mspire::Mzml::ScanSettings objects
    attr_accessor :scan_settings_list

    # (required) an array of Mspire::Mzml::InstrumentConfiguration objects
    attr_accessor :instrument_configurations

    # (required) an array of Mspire::Mzml::DataProcessing objects
    attr_accessor :data_processing_list

    # (required) an Mspire::Mzml::Run object
    attr_accessor :run

    # the io object of the mzml file
    attr_accessor :io

    # Mspire::Mzml::IndexList object associated with the file (only expected when reading
    # mzml files at the moment)
    attr_accessor :index_list

    # xml file encoding
    attr_accessor :encoding


    # arg must be an IO object for automatic index and header parsing to
    # occur.  If arg is a hash, then attributes are set.  In addition (or
    # alternatively) a block called that yields self to setup the object.
    #
    # io must respond_to?(:size), giving the size of the io object in bytes
    # which allows seeking.  get_index_list is called to get or create the
    # index list.
    def initialize(arg=nil, &block)
      %w(cvs software_list instrument_configurations samples data_processing_list).each {|guy| self.send( guy + '=', [] ) }

      case arg
      when IO
        set_from_xml_io!(arg)
      when Hash
        arg.each {|k,v| self.send("#{k}=", v) }
      end
      block.call(self) if block
    end

    module Convenience
      def each_chromatogram(&block)
        @run.chromatogram_list.each(&block)
      end

      def each_spectrum(&block)
        @run.spectrum_list.each(&block)
      end

      alias_method :each, :each_spectrum

      # @param [Object] arg an index number (Integer) or id string (String)
      # @return [Mspire::Mzml::Spectrum] a spectrum object
      def spectrum(arg)
        run.spectrum_list[arg]
      end
      alias_method :'[]', :spectrum

      # @param [Object] arg an index number (Integer) or id string (String)
      # @return [Mspire::Mzml::Chromatogram] a spectrum object
      def chromatogram(arg)
        run.chromatogram_list[arg]
      end

      def num_chromatograms
        run.chromatogram_list.size
      end

      # returns the number of spectra
      def length
        run.spectrum_list.size
      end
      alias_method :size, :length

      # @param [Integer] scan_num the scan number 
      # @return [Mspire::Spectrum] a spectrum object, or nil if not found
      # @raise [ScanNumbersNotUnique] if scan numbers are not unique
      # @raise [ScanNumbersNotFound] if spectra exist but scan numbers were not
      #   found
      def spectrum_from_scan_num(scan_num)
        @scan_to_index ||= @index_list[0].create_scan_index
        raise ScanNumbersNotUnique if @scan_to_index == false
        raise ScanNumbersNotFound if @scan_to_index == nil
        spectrum(@scan_to_index[scan_num])
      end
    end
    include Convenience
    
    # Because mzml files are often very large, we try to avoid storing the
    # entire object tree in memory before writing.
    # 
    # takes a filename and uses builder to write to it
    # if no filename is given, returns a string
    def to_xml(filename=nil)
      # TODO: support indexed mzml files
      io = filename ? File.open(filename, 'w') : StringIO.new
      xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
      xml.instruct!

      mzml_atts = Default::NAMESPACE.dup
      mzml_atts[:version] = @version || Default::VERSION
      mzml_atts[:accession] = @accession if @accession
      mzml_atts[:id] = @id if @id

      xml.mzML(mzml_atts) do |mzml_n|
        # the 'if' statements capture whether or not the list is required or not
        raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0 
        Mspire::Mzml::CV.list_xml(@cvs, mzml_n)
        @file_description.to_xml(mzml_n)
        if @referenceable_param_groups
          Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
        end
        if @samples && @samples.size > 0
          Mspire::Mzml::Sample.list_xml(@samples, mzml_n)
        end
        Mspire::Mzml::Software.list_xml(@software_list, mzml_n)
        if @scan_settings_list && @scan_settings_list.size > 0
          Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
        end
        icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
        Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
        @run.to_xml(mzml_n)
      end
      
      if filename
        io.close 
        self
      else
        io.string
      end
    end

    class ScanNumbersNotUnique < Exception
    end
    class ScanNumbersNotFound < Exception
    end
  end
end