#!/usr/bin/env ruby require 'rbbt/workflow' require 'rbbt-util' require 'fileutils' require 'rbbt/util/simpleopt' require 'rbbt/workflow/step' require 'rbbt/workflow/provenance' require 'rbbt/util/misc' require 'rbbt-util' require 'rbbt/util/simpleopt' require 'rbbt/util/R' $0 = "rbbt #{$previous_commands*""} #{ File.basename(__FILE__) }" if $previous_commands options = SOPT.setup <<EOF Examine the provenance of a job result $ rbbt workflow trace <job-result> <file.png> -h--help Help -i--inputs* List of inputs to print -if--info_fields* List of info fields to print EOF SOPT.usage if options[:help] $inputs = (options[:inputs] || "").split(",") $info_fields = (options[:info_fields] || "").split(",") file = ARGV.shift || "/home/mvazque2/.rbbt/var/jobs/Sample/haplotype/NA12878" plot = ARGV.shift || "/tmp/plot.png" $seen = [] def get_step(file) file = File.expand_path(file) file = file.sub(/\.(info|files)/,'') $seen << file Workflow.load_step file end step = get_step file $main_mtime = Open.exist?(step.path) ? Open.mtime(step.path) : nil data = TSV.setup({}, "Job~Workflow,Task,Start,End#:type=:list") jobs = step.rec_dependencies + [step] jobs = jobs.select{|job| job.info[:done]}.sort_by{|job| job.info[:started]} min_start = nil max_done = nil jobs.each do |job| next unless job.done? started = job.info[:started] ddone = job.info[:done] data[[job.task_name.to_s, job.clean_name] * "."] = [job.workflow.to_s, job.task_name,started,ddone] if min_start.nil? min_start = started else min_start = started if started < min_start end if max_done.nil? max_done = ddone else max_done = ddone if ddone > max_done end end data.add_field "Start.second" do |k,value| value["Start"] - min_start end data.add_field "End.second" do |k,value| value["End"] - min_start end Log.severity = 0 #data.R_interactive <<-EOF # rbbt.require('timevis') # data$id = rownames(data) # data$content = data$Task # data$start = data$Start # data$end = data$End # # timevis(data) #EOF data.R <<-EOF, [:svg] rbbt.require('tidyverse') rbbt.require('ggplot2') names(data) <- make.names(names(data)) data$id = rownames(data) data$content = data$Task data$start = data$Start data$end = data$End data$Project = data$Workflow tasks = data #theme_gantt <- function(base_size=11, base_family="Source Sans Pro Light") { theme_gantt <- function(base_size=11, base_family="Sans Serif") { ret <- theme_bw(base_size, base_family) %+replace% theme(panel.background = element_rect(fill="#ffffff", colour=NA), axis.title.x=element_text(vjust=-0.2), axis.title.y=element_text(vjust=1.5), title=element_text(vjust=1.2, family="Source Sans Pro Semibold"), panel.border = element_blank(), axis.line=element_blank(), panel.grid.minor=element_blank(), panel.grid.major.y = element_blank(), panel.grid.major.x = element_line(size=0.5, colour="grey80"), axis.ticks=element_blank(), legend.position="bottom", axis.title=element_text(size=rel(0.8), family="Source Sans Pro Semibold"), strip.text=element_text(size=rel(1), family="Source Sans Pro Semibold"), strip.background=element_rect(fill="#ffffff", colour=NA), panel.spacing.y=unit(1.5, "lines"), legend.key = element_blank()) ret } tasks.long <- tasks %>% gather(date.type, task.date, -c(Project, Task, id, Start.second, End.second)) %>% arrange(date.type, task.date) %>% mutate(id = factor(id, levels=rev(unique(id)), ordered=TRUE)) x.breaks <- seq(length(tasks$Task) + 0.5 - 3, 0, by=-3) timeline <- ggplot(tasks.long, aes(y=id, yend=id, x=Start.second, xend=End.second, colour=Task)) + geom_segment() + geom_vline(xintercept=x.breaks, colour="grey80", linetype="dotted") + guides(colour=guide_legend(title=NULL)) + labs(x=NULL, y=NULL) + theme_gantt() + theme(axis.text.x=element_text(angle=45, hjust=1)) rbbt.png_plot('#{plot}', 'timeline', width=2000) EOF