#!/usr/bin/env ruby
# encoding: utf-8
# Version = '20140411-084254'

require 'csv'
require 'fileutils'
require 'yaml'
require 'drb/drb'
gem 'rails', '<=3.2.15'
require 'rails/all'

module SushiFabric
  class Application < Rails::Application
    # default parameters
    default_root = Rails.root||Dir.pwd
    config.workflow_manager = 'druby://localhost:12345'
    config.gstore_dir = File.join(default_root, 'public/gstore/projects')
    config.sushi_app_dir = default_root
    config.scratch_dir = '/tmp/scratch'
  end

  # load custmized parameters if there is
  mode = ENV['RAILS_ENV']||'development'
  config_file = File.join('./config/environments', mode)
  if File.exist?(config_file + '.rb')
    require config_file
  else
    FileUtils.mkdir_p File.dirname(config_file)
    open(config_file+'.rb', "w") do |out|
      out.print <<-EOF
module SushiFabric
  class Application < Rails::Application
    # default parameters
    config.workflow_manager = 'druby://localhost:12345'
    config.gstore_dir = File.join(#{default_root}, 'public/gstore/projects')
    config.sushi_app_dir = #{default_root}
    config.scratch_dir = '/tmp/scratch'
  end
end
      EOF
    end
  end

  config = SushiFabric::Application.config
  WORKFLOW_MANAGER = config.workflow_manager
  GSTORE_DIR = config.gstore_dir
  SUSHI_APP_DIR = config.sushi_app_dir
  SCRATCH_DIR = config.scratch_dir
  
  unless File.exist?(GSTORE_DIR)
    FileUtils.mkdir_p GSTORE_DIR
  end

  # check if there is a sqlite3 database of Ruby on Rails
  if defined?(::Project)
    NO_ROR = false
  elsif File.exist?(File.join(SUSHI_APP_DIR, "app/models"))
    NO_ROR = false
    ActiveRecord::Base.establish_connection(
                :adapter  => 'sqlite3',
                :database => "#{SUSHI_APP_DIR}/db/development.sqlite3" 
            )
    require "#{SUSHI_APP_DIR}/app/models/project"
    require "#{SUSHI_APP_DIR}/app/models/data_set"
    require "#{SUSHI_APP_DIR}/app/models/sample"
    require "#{SUSHI_APP_DIR}/app/models/job"
  else
    NO_ROR = true
  end

class ::Array
  def to_h
    Hash[*flatten]
  end
end
class ::Hash
  attr_reader :defaults
  alias :set :[]=
  alias :get :[]
  def []=(k1,k2,v=nil)
    if v
      @desc ||= {}
      @desc.set([k1,k2].join('_'),v)
    else
      @defaults ||= {}
      if !@defaults[k1] and k2
        if k2.instance_of?(Array)
          @defaults.set(k1,k2.first)
        elsif k2.instance_of?(Hash) and k2.first
          @defaults.set(k1,k2.first.last)
        else
          @defaults.set(k1,k2)
        end
      end
      set(k1,k2)
    end
  end
  def default_value(k,v=nil)
    if v
      @defaults[k] = v
    else
      @defaults[k]
    end
  end
  def data_type(k)
    @defaults[k].class
  end
  def data_types
    Hash[@defaults.map{|k,v| [k, v.class]}]
  end
  def [](k1, k2=nil)
    if k2
      if @desc
        @desc.get([k1,k2].join('_'))
      else
        nil
      end
    else
      get(k1)
    end
  end
end
class ::String
  def tag?(tag)
    scan(/\[(.*)\]/).flatten.join =~ /#{tag}/
  end
end
def save_data_set(data_set_arr, headers, rows)
  data_set_hash = Hash[*data_set_arr]
  if project = Project.find_by_number(data_set_hash['ProjectNumber'].to_i)
    data_set = DataSet.new
    data_set.name = data_set_hash['DataSetName']
    data_set.project = project
    if parent_id = data_set_hash['ParentID'] and parent_data_set = DataSet.find_by_id(parent_id.to_i)
      data_set.data_set = parent_data_set
    end
    if comment = data_set_hash['Comment'] and !comment.to_s.empty?
      data_set.comment = comment
    end

    sample_hash = {}
    rows.each do |row|
      headers.each_with_index do |header, i|
       sample_hash[header]=row[i]
      end
      sample = Sample.new
      sample.key_value = sample_hash.to_s
      sample.save unless sample.saved?
      data_set.samples << sample
    end

    data_set.md5 = data_set.md5hexdigest
    unless data_set.saved?
      project.data_sets << data_set
      parent_data_set.data_sets << data_set if parent_data_set
      data_set.save
    end
    data_set.id
  end
end

class SushiApp
  attr_reader :params
  attr_reader :job_ids
  attr_reader :next_dataset_id
  attr_reader :required_columns
  attr_reader :required_params
  attr_reader :dataset_hash
  attr_reader :analysis_category
  attr_reader :description
  attr_reader :name
  attr_accessor :dataset_tsv_file
  attr_accessor :parameterset_tsv_file
  attr_accessor :dataset_sushi_id
  attr_accessor :data_set
  attr_accessor :project
  attr_accessor :user
  attr_accessor :next_dataset_name
  attr_accessor :next_dataset_comment
  def initialize
    @gstore_dir = GSTORE_DIR
    @project = nil
    @name = nil
    @params = {}
    @params['cores'] = nil
    @params['ram'] = nil
    @params['scratch'] = nil
    @params['node'] = ''
    @params['process_mode'] = 'SAMPLE'
    @job_ids = []
    @required_columns = []
    @workflow_manager = DRbObject.new_with_uri(WORKFLOW_MANAGER)
  end
  def set_input_dataset
    if @dataset_tsv_file
      dataset_tsv = CSV.readlines(@dataset_tsv_file, {:headers=>true, :col_sep=>"\t"})
      @dataset_hash = []
      @dataset = []
      dataset_tsv.each do |row|
        @dataset_hash << row.to_hash
        @dataset << row.to_hash
      end
    elsif @dataset_sushi_id
      @dataset_hash = []
      @dataset = []
      if dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
        dataset.samples.each do |sample|
          @dataset_hash << sample.to_hash
          @dataset << sample.to_hash
        end
      end
    end
    @dataset_hash
  end
  def get_columns_with_tag(tag)
    #@factor_cols = @dataset_hash.first.keys.select{|header| header =~ /\[#{tag}\]/}.map{|header| header.gsub(/\[.+\]/,'').strip}
    @dataset_hash.map{|row| 
      Hash[*row.select{|k,v| k=~/\[#{tag}\]/}.map{|k,v| [k.gsub(/\[.+\]/,'').strip,v]}.flatten]
    }
  end
  def set_default_parameters
    # this should be overwritten in a subclass
  end
  def dataset_has_column?(colname)
    flag = false
    if @dataset_hash
      @dataset_hash.map{|sample| 
        sample.each do |key, value|
          if key =~ /#{colname}/
            flag = true
          end
        end
        break
      }
    end
    flag
  end

  def set_output_files
    @dataset = {}
    next_dataset.keys.select{|header| header.tag?('File')}.each do |header|
      @output_files ||= []
      @output_files << header
    end
    @output_files = @output_files.uniq
  end
  def check_required_columns
    if @dataset_hash and @required_columns and (@required_columns-@dataset_hash.map{|row| row.keys}.flatten.uniq.map{|colname| colname.gsub(/\[.+\]/,'').strip}).empty?
      true
    else
      false
    end
  end
  def check_application_parameters
    if @required_params and (@required_params - @params.keys).empty?
      @output_params = @params.clone
    end
  end
  def set_user_parameters
    # this should be done in an instance of applicaiton subclass
    if @parameterset_tsv_file
      parameterset_tsv = CSV.readlines(@parameterset_tsv_file, :col_sep=>"\t")
      headers = []
      parameterset_tsv.each do |row|
        header, value = row
        headers << header
        @params[header] = if @params.data_type(header) == String or value == nil
                            value
                          else
                            eval(value)
                          end
      end
      (@params.keys - headers).each do |key|
        @params[key] = @params.default_value(key)
      end
    end
    @params
  end
  def set_dir_paths
    ## sushi figures out where to put the resulting dataset
    unless @name and @project
      raise "should set #name and #project"
    end
    @name.gsub!(/\s/,'_')
    result_dir_base = if @next_dataset_name
                        [@next_dataset_name, Time.now.strftime("%Y-%m-%d--%H-%M-%S")].join("_")
                      else
                        [@analysis_category, @name, @dataset_sushi_id.to_s, Time.now.strftime("%Y-%m-%d--%H-%M-%S")].join("_")
                      end
    @result_dir = File.join(@project, result_dir_base)
    @scratch_result_dir = File.join(SCRATCH_DIR, result_dir_base)
    @job_script_dir = File.join(@scratch_result_dir, 'scripts')
    @gstore_result_dir = File.join(@gstore_dir, @result_dir)
    @gstore_script_dir = File.join(@gstore_result_dir, 'scripts')
    @gstore_project_dir = File.join(@gstore_dir, @project)
    set_file_paths
  end
  def prepare_result_dir
    FileUtils.mkdir_p(@scratch_result_dir)
    FileUtils.mkdir_p(@job_script_dir)
  end
  def job_header
    @scratch_dir = if @params['process_mode'] == 'SAMPLE'
                     @scratch_result_dir + "_" + @dataset['Name']
                   else
                     @scratch_result_dir
                   end
    @out.print <<-EOF
#!/bin/bash
set -e
set -o pipefail

#### SET THE STAGE
SCRATCH_DIR=#{@scratch_dir}
GSTORE_DIR=#{@gstore_dir}
mkdir $SCRATCH_DIR || exit 1
cd $SCRATCH_DIR || exit 1
echo "Job runs on `hostname`"
echo "at $SCRATCH_DIR"

    EOF
  end
  def job_footer
    @out.print "#### JOB IS DONE WE PUT THINGS IN PLACE AND CLEAN AUP\n"
    if @output_files
      @output_files.map{|header| next_dataset[header]}.each do |file|
        # in actual case, to save under /srv/gstore/
        src_file = File.basename(file)
        dest_dir = File.dirname(File.join(@gstore_dir, file))
        @out.print copy_commands(src_file, dest_dir).join("\n"), "\n"
      end
    end
    @out.print <<-EOF
cd ~
rm -rf #{@scratch_dir} || exit 1
    EOF

  end
  def job_main
    @out.print "#### NOW THE ACTUAL JOBS STARTS\n"
    @out.print commands, "\n\n"
  end
  def next_dataset
    # this should be overwritten in a subclass
  end
  def commands
    # this should be overwritten in a subclass
  end
  def submit_command(job_script)
    gsub_options = []
    gsub_options << "-c #{@params['cores']}" unless @params['cores'].to_s.empty?
    gsub_options << "-n #{@params['node']}" unless @params['node'].to_s.empty?
    gsub_options << "-r #{@params['ram']}" unless @params['ram'].to_s.empty?
    gsub_options << "-s #{@params['scratch']}" unless @params['scratch'].to_s.empty?
    gsub_options << "-u #{@user}" if @user
    command = "wfm_monitoring --server #{WORKFLOW_MANAGER} --project #{@project.gsub(/p/,'')} --logdir #{@gstore_script_dir} #{job_script} #{gsub_options.join(' ')}"
  end
  def submit(job_script)
    command = submit_command(job_script)
    puts "submit: #{command}"
    job_id = `#{command}`
    job_id = job_id.to_i
    unless job_id.to_i > 1
      raise 'failed in job submitting'
    end
    job_id
  end
  def preprocess
    # this should be overwritten in a subclass
  end
  def set_file_paths
    @parameter_file = 'parameters.tsv'
    @input_dataset_file = 'input_dataset.tsv'
    @next_dataset_file = 'dataset.tsv'
    @input_dataset_tsv_path = File.join(@gstore_result_dir, @input_dataset_file)
    @parameters_tsv_path = File.join(@gstore_result_dir, @input_dataset_file)
    @next_dataset_tsv_path = File.join(@gstore_result_dir, @next_dataset_file)
  end
  def save_parameters_as_tsv
    file_path = File.join(@scratch_result_dir, @parameter_file)
    CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
      @output_params.each do |key, value|
        out << [key, value]
      end
    end
    file_path
  end
  def save_input_dataset_as_tsv
    file_path = File.join(@scratch_result_dir, @input_dataset_file)
    CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
      headers = @dataset_hash.map{|row| row.keys}.flatten.uniq
      out << headers
      @dataset_hash.each do |row|
        out << headers.map{|header| row[header]}
      end
    end
    file_path
  end
  def save_next_dataset_as_tsv
    headers = @result_dataset.map{|row| row.keys}.flatten.uniq
    file_path = File.join(@scratch_result_dir, @next_dataset_file)
    CSV.open(file_path, 'w', :col_sep=>"\t") do |out|
      out << headers
      @result_dataset.each do |row_hash|
        out << headers.map{|header| row_hash[header]}
      end
    end
    file_path
  end
  def copy_commands(org_dir, dest_parent_dir)
    @workflow_manager.copy_commands(org_dir, dest_parent_dir)
  end
  def copy_dataset_parameter_jobscripts
    org = @scratch_result_dir
    dest = @gstore_project_dir
    copy_commands(org, dest).each do |command|
      puts command
      unless system command
        raise "fails in copying next_dataset files from /scratch to /gstore"
      end
    end
    sleep 1
    command = "rm -rf #{@scratch_result_dir}"
    `#{command}`
  end
  def make_job_script
    @out = open(@job_script, 'w')
    job_header
    job_main
    job_footer
    @out.close
  end
  def sample_mode
    @dataset_hash.each do |row|
      @dataset = Hash[*row.map{|key,value| [key.gsub(/\[.+\]/,'').strip, value]}.flatten]
      ## WRITE THE JOB SCRIPT
      sample_name = @dataset['Name']||@dataset.first
      @job_script = if @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
                      File.join(@job_script_dir, @analysis_category + '_' + sample_name) + '_' + dataset.name.gsub(/\s+/,'_') + '.sh'
                    else 
                      File.join(@job_script_dir, @analysis_category + '_' + sample_name) + '.sh'
                    end
      make_job_script
      @job_scripts << @job_script
      @result_dataset << next_dataset
    end
  end
  def dataset_mode
    @job_script = if @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
                    File.join(@job_script_dir, @analysis_category + '_' + dataset.name.gsub(/\s+/,'_') + '.sh')
                  else 
                    File.join(@job_script_dir, @analysis_category + '_' + 'job_script.sh')
                  end
    make_job_script
    @job_scripts << @job_script
    @result_dataset << next_dataset
  end
  def save_data_set(data_set_arr, headers, rows)
    data_set_hash = Hash[*data_set_arr]
    if project = Project.find_by_number(data_set_hash['ProjectNumber'].to_i)
      data_set = DataSet.new
      data_set.name = data_set_hash['DataSetName']
      data_set.project = project
      if parent_id = data_set_hash['ParentID'] and parent_data_set = DataSet.find_by_id(parent_id.to_i)
        data_set.data_set = parent_data_set
      end
      if comment = data_set_hash['Comment'] and !comment.to_s.empty?
        data_set.comment = comment
      end

      sample_hash = {}
      rows.each do |row|
        headers.each_with_index do |header, i|
         sample_hash[header]=row[i]
        end
        sample = Sample.new
        sample.key_value = sample_hash.to_s
        sample.save unless sample.saved?
        data_set.samples << sample
      end

      data_set.md5 = data_set.md5hexdigest
      unless data_set.saved?
        project.data_sets << data_set
        parent_data_set.data_sets << data_set if parent_data_set
        data_set.save
      end
      data_set.id
    end
  end
  def run
    test_run

    ## the user presses RUN
    prepare_result_dir

    ## copy application data to gstore 
    save_parameters_as_tsv
    save_input_dataset_as_tsv


    ## sushi writes creates the job scripts and builds the result data set that is to be generated
    @result_dataset = []
    @job_scripts = []
    if @params['process_mode'] == 'SAMPLE'
      sample_mode
    elsif @params['process_mode'] == 'DATASET'
      dataset_mode
    else 
      #stop
      warn "the process mode (#{@params['process_mode']}) is not defined"
      raise "stop job submitting"
    end

    # job submittion
    @job_scripts.each_with_index do |job_script, i|
      job_id = submit(job_script)
      @job_ids << job_id
      print "Submit job #{File.basename(job_script)} job_id=#{job_id}"
    end

		puts
    print 'job scripts: '
    p @job_scripts
    print 'result dataset: '
    p @result_dataset

    # copy application data to gstore 
    next_dataset_tsv_path = save_next_dataset_as_tsv

    if !@job_ids.empty? and @dataset_sushi_id and dataset = DataSet.find_by_id(@dataset_sushi_id.to_i)
      data_set_arr = []
      headers = []
      rows = []
      next_dataset_name = if name = @next_dataset_name
                            name.to_s
                          else
                            "#{@analysis_category}_#{@name.gsub(/\s/,'').gsub(/_/,'')}_#{dataset.id}"
                          end
      data_set_arr = {'DataSetName'=>next_dataset_name, 'ProjectNumber'=>@project.gsub(/p/,''), 'ParentID'=>@dataset_sushi_id, 'Comment'=>@next_dataset_comment.to_s}
      csv = CSV.readlines(next_dataset_tsv_path, :col_sep=>"\t")
      csv.each do |row|
        if headers.empty?
          headers = row
        else
          rows << row
        end
      end
      unless NO_ROR
        @next_dataset_id = save_data_set(data_set_arr.to_a.flatten, headers, rows)

        # save job and dataset relation in Sushi DB
        job_ids.each do |job_id|
          new_job = Job.new
          new_job.submit_job_id = job_id.to_i
          new_job.next_dataset_id = @next_dataset_id
          new_job.save
          new_job.data_set.jobs << new_job
          new_job.data_set.save
        end

      end
    end
    t = Thread.new do
      copy_dataset_parameter_jobscripts
    end
    if Thread.main == Thread.current # in case of running script directly on terminal
      warn "copy dataset, parameter, jobscripts..."
      t.join
      warn "copy finish."
    end
  end
  def test_run
    set_dir_paths
    set_input_dataset
    preprocess
    set_output_files
    set_user_parameters

    failures = 0
    print 'check project name: '
    unless @project
      puts "\e[31mFAILURE\e[0m: project number is required but not found. you should set it in usecase."
      puts "\tex.)"
      puts "\tapp = #{self.class}.new"
      puts "\tapp.project = 'p1001'"
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:\n\t@project=#{@project}"
    end

    print 'check user name: '
    unless @user
      puts "\e[31mWARNING\e[0m: user number is ought to be added but not found. you should set it in usecase. Default will be 'sushi lover'"
      puts "\tex.)"
      puts "\tapp = #{self.class}.new"
      puts "\tapp.user = 'masa'"
    else
      puts "\e[32mPASSED\e[0m:\n\t@user=#{@user}"
    end

    print 'check application name: '
    if @name.to_s.empty?
      puts "\e[31mFAILURE\e[0m: application name is required but not found. you should set it in application class."
      puts "\tex.)"
      puts "\tclass #{self.class}"
      puts "\t def initialize"
      puts "\t  @name = '#{self.class}'"
      puts "\t end"
      puts "\tend"
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:\n\t@name=#{@name}"
    end

    print 'check analysis_category: '
    if @analysis_category.to_s.empty?
      puts "\e[31mFAILURE\e[0m: analysis_category is required but not found. you should set it in application class."
      puts "\tex.)"
      puts "\tclass #{self.class}"
      puts "\t def initialize"
      puts "\t  @analysis_category = 'Mapping'"
      puts "\t end"
      puts "\tend"
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:\n\t@analysis_category=#{@analysis_category}"
    end

    print 'check dataset: '
    if !@dataset_hash or @dataset_hash.empty?
      puts "\e[31mFAILURE\e[0m: dataset is not found. you should set it by using #{self.class}#dataset_sushi_id or #{self.class}#dataset_tsv_file properties"
      puts "\tex.)"
      puts "\tusecase = #{self.class}.new"
      puts "\tusecase.dataset_tsv_file = \"dataset.tsv\""
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:\n\t@dataset_hash.length = #{@dataset_hash.length}"
    end

    print 'check required columns: '
    unless check_required_columns
      puts "\e[31mFAILURE\e[0m: required_column(s) is not found in dataset. you should set it in application class."
      puts "\tex.)"
      puts "\tdef initialize"
      puts "\t  @required_columns = ['Name', 'Read1']"
      puts
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:"
    end
    puts "\trequired columns: #{@required_columns}"
    puts "\tdataset  columns: #{@dataset_hash.map{|row| row.keys}.flatten.uniq}" if @dataset_hash

    print 'check required parameters: '
    unless check_application_parameters
      puts "\e[31mFAILURE\e[0m: required_param(s) is not set yet. you should set it in usecase"
      puts "\tmissing params: #{@required_params-@params.keys}" if @required_params
      puts "\tex.)"
      puts "\tusecase = #{self.class}.new"
      if @required_params
        puts "\tusecase.params['#{(@required_params-@params.keys)[0]}'] = parameter"
      else
        puts "\tusecase.params['parameter name'] = default_parameter"
      end
      puts
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:"
    end
    puts "\tparameters: #{@params.keys}"
    puts "\trequired  : #{@required_params}"

    print 'check next dataset: '
    @dataset={}
    unless self.next_dataset
      puts "\e[31mFAILURE\e[0m: next dataset is not set yet. you should overwrite SushiApp#next_dataset method in #{self.class}"
      puts "\tnote: the return value should be Hash (key: column title, value: value in a tsv table)"
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:"
    end

    print 'check output files: '
    if !@output_files or @output_files.empty?
      puts "\e[31mWARNING\e[0m: no output files. you will not get any output files after the job running. you can set @output_files (array) in #{self.class}"
      puts "\tnote: usually it should be define in initialize method"
      puts "\t      the elements of @output_files should be chosen from #{self.class}#next_dataset.keys"
      puts "\t      #{self.class}#next_dataset.keys: #{self.next_dataset.keys}" if self.next_dataset
    else
      puts "\e[32mPASSED\e[0m:"
    end

    print 'check commands: '
    if @params['process_mode'] == 'SAMPLE'
      @dataset_hash.each do |row|
        @dataset = Hash[*row.map{|key,value| [key.gsub(/\[.+\]/,'').strip, value]}.flatten]
        unless com = commands
          puts "\e[31mFAILURE\e[0m: any commands is not defined yet. you should overwrite SushiApp#commands method in #{self.class}"
          puts "\tnote: the return value should be String (this will be in the main body of submitted job script)"
          failures += 1
        else
          puts "\e[32mPASSED\e[0m:"
          puts "generated command will be:"
          puts "\t"+com.split(/\n/).join("\n\t")+"\n"
        end
      end
    elsif @params['process_mode'] == 'DATASET'
      unless com = commands
        puts "\e[31mFAILURE\e[0m: any commands is not defined yet. you should overwrite SushiApp#commands method in #{self.class}"
        puts "\tnote: the return value should be String (this will be in the main body of submitted job script)"
        failures += 1
      else
        puts "\e[32mPASSED\e[0m:"
        puts "generated command will be:"
        puts "\t"+com.split(/\n/).join("\n\t")+"\n"
      end
    end

    print 'check workflow manager: '
    begin
      hello = `wfm_hello #{WORKFLOW_MANAGER}`
    rescue
    end
    unless hello =~ /hello/
      puts "\e[31mFAILURE\e[0m: workflow_manager does not reply. check if workflow_manager is working"
      failures += 1
    else
      puts "\e[32mPASSED\e[0m:"
    end

    if failures > 0
      puts
      puts "\e[31mFailures (#{failures})\e[0m: All failures should be solved"
      raise "test run fails"
    else
      puts "All checks \e[32mPASSED\e[0m"
    end
  end
end


end