external_code/plot_heatmap.R in pets-0.2.3 vs external_code/plot_heatmap.R in pets-0.2.4

- old
+ new

@@ -139,27 +139,32 @@ colnames(data_raw) <- c("SetA","SetB","Value") data_raw$SetA <- as.character(data_raw$SetA) data_raw$SetB <- as.character(data_raw$SetB) if(opt$same_sets){ all_elements <- sort(unique(unlist(data_raw[,c("SetA","SetB")]))) - data <- matrix(0, length(all_elements), length(all_elements), dimnames = list(all_elements, all_elements)) - str(data_raw) - str(data) + data <- matrix(NA, length(all_elements), length(all_elements), dimnames = list(all_elements, all_elements)) data[as.matrix(data_raw[,c("SetA","SetB")])] <- data_raw$Value data[as.matrix(data_raw[,c("SetB","SetA")])] <- data_raw$Value + # save(data, file = "test2.RData") } else { rowSet <- unique(data_raw$SetA) colSet <- unique(data_raw$SetB) data <- matrix(0, ncol = length(colSet), nrow = length(rowSet), dimnames = list(rowSet, colSet)) data[as.matrix(data_raw[,c("SetA","SetB")])] <- data_raw$Value } }else{ # Load matrix if(!is.null(opt$npy)){ - axis_labels <- read.table(opt$npy, header=FALSE, stringsAsFactors=FALSE) data <- npyLoad(opt$data_file) - colnames(data) <- axis_labels$V1 + axis_files <- unlist(strsplit(opt$npy, ',')) + axis_labels <- read.table(axis_files[1], header=FALSE, stringsAsFactors=FALSE, sep="\t") rownames(data) <- axis_labels$V1 + if(length(axis_files) == 2){ + x_axis_labels <- read.table(axis_files[2], header=FALSE, stringsAsFactors=FALSE, sep="\t") + colnames(data) <- x_axis_labels$V1 + }else{ + colnames(data) <- axis_labels$V1 + } }else{ data <- as.matrix(read.table(opt$data_file, sep="\t", header=opt$header, stringsAsFactors=FALSE, row.names= 1, check.names = FALSE)) } } @@ -185,36 +190,36 @@ ######### CLUSTERING if(opt$same_sets){ hr <- fastcluster::hclust(as.dist(matrix_transf), method="ward.D2") groups <- cluster_obj_to_groups(matrix_transf, hr, opt$tree_cut_method, minProportionCluster = opt$minProportionCluster) - sim_between_groups <- calc_sim_between_groups(data, groups) - distance_between_groups <- 1 - sim_between_groups - groups_clustered <- fastcluster::hclust(as.dist(distance_between_groups), method="ward.D2") - - # Plot dendrogram to check performance - # png(file=file.path(opt$output, 'dendrogram_groups.png', sep='')) - # plot(dendrogram_groups) - # dev.off() - ######### EXPORT write.table(groups, file=paste0(opt$output, '_clusters.txt'), sep="\t", quote=FALSE, col.names=FALSE, row.names= TRUE) if (opt$save_raw_clust){ dendrogram_groups <- as.dendrogram(hr) - } else { + } else { # TODO Pedro: I have no idea about the reason for the following code + sim_between_groups <- calc_sim_between_groups(data, groups) + distance_between_groups <- 1 - sim_between_groups + groups_clustered <- fastcluster::hclust(as.dist(distance_between_groups), method="ward.D2") dendrogram_groups <- as.dendrogram(groups_clustered) } + # Plot dendrogram to check performance + # png(file=file.path(opt$output, 'dendrogram_groups.png', sep='')) + # plot(dendrogram_groups) + # dev.off() save(dendrogram_groups, file=paste0(opt$output, '_dendrogram_groups.RData', sep='')) }else{ # Calc similitudes of rows mdistRows = toDistances(matrix_transf) mdistCols = toDistances(matrix_transf, FALSE) + rownames(mdistRows) <- rownames(matrix_transf) # The square matrixes obtained have lost + colnames(mdistRows) <- rownames(matrix_transf) # row and col names. We retrieve them + rownames(mdistCols) <- colnames(matrix_transf) # from original matrix. In previous case, + colnames(mdistCols) <- colnames(matrix_transf) # the matrix_transf is used directly by hclust # Obtaing clustering - quantValue_row <- quantile(mdistRows, c(.2), na.rm = TRUE) hr_row <- fastcluster::hclust(as.dist(mdistRows), method="ward.D2") - groups_row <- cluster_obj_to_groups(mdistRows, hr, opt$tree_cut_method) - + groups_row <- cluster_obj_to_groups(mdistRows, hr_row, opt$tree_cut_method, minProportionCluster = opt$minProportionCluster) quantValue_col <- quantile(mdistCols, c(.2), na.rm = TRUE) hr_col <- fastcluster::hclust(as.dist(mdistCols), method="ward.D2") groups_col <- cutree(hr_col, h = quantValue_col) ######### EXPORT write.table(groups_row, file=paste(opt$output, '_clusters_rows.txt', sep=''), sep="\t", quote=FALSE, col.names=FALSE) @@ -225,21 +230,20 @@ if(opt$pdf){ pdf(paste0(opt$output, '_heatmap.pdf'), width = 1000, height = 1000, units = "px", res=175, pointsize = 8) }else{ png(paste0(opt$output, '_heatmap.png'), width = 1000, height = 1000, units = "px", res=175, pointsize = 8) } - if(opt$same_sets){ - group_colours <- colorRampPalette(brewer.pal(8, "Set1"))(length(unique(groups))) - # print("cluster number:") - # print(length(unique(groups))) - # print(length(unique(group_colours))) - group_colours_arranged <- group_colours[groups] - heatmap.2(data, Rowv=as.dendrogram(hr), Colv=as.dendrogram(hr), trace="none", col=brewer.pal(11,"RdBu"), dendrogram = c("row"), labRow = FALSE, labCol = FALSE, - xlab = opt$collabel, ylab = opt$rowlabel, RowSideColors=group_colours_arranged) - }else{ - heatmap.2(data, Rowv=as.dendrogram(hr_row), Colv=as.dendrogram(hr_col), trace="none", col=brewer.pal(11,"RdBu"), labRow = FALSE, labCol = FALSE, - xlab = opt$collabel, ylab = opt$rowlabel) +if(opt$same_sets){ + group_colours <- colorRampPalette(brewer.pal(8, "Set1"))(length(unique(groups))) + group_colours_arranged <- c(rep('#000000', length(groups[groups == 0])), group_colours[groups]) + heatmap.2(data, Rowv=as.dendrogram(hr), Colv=as.dendrogram(hr), trace="none", col=brewer.pal(11,"RdBu"), dendrogram = c("row"), labRow = FALSE, labCol = FALSE, + xlab = opt$collabel, ylab = opt$rowlabel, RowSideColors=group_colours_arranged) +}else{ + group_colours <- colorRampPalette(brewer.pal(8, "Set1"))(length(unique(groups_row))) + group_colours_arranged <- c(rep('#000000', length(groups_row[groups_row == 0])), group_colours[groups_row]) + heatmap.2(data, Rowv=as.dendrogram(hr_row), Colv=as.dendrogram(hr_col), trace="none", col=brewer.pal(11,"RdBu"), dendrogram = c("row"), labRow = FALSE, labCol = FALSE, + xlab = opt$collabel, ylab = opt$rowlabel, RowSideColors=group_colours_arranged) - } +} dev.off() # save.image("test.RData")