external_code/plot_heatmap.R in pets-0.2.3 vs external_code/plot_heatmap.R in pets-0.2.4
- old
+ new
@@ -139,27 +139,32 @@
colnames(data_raw) <- c("SetA","SetB","Value")
data_raw$SetA <- as.character(data_raw$SetA)
data_raw$SetB <- as.character(data_raw$SetB)
if(opt$same_sets){
all_elements <- sort(unique(unlist(data_raw[,c("SetA","SetB")])))
- data <- matrix(0, length(all_elements), length(all_elements), dimnames = list(all_elements, all_elements))
- str(data_raw)
- str(data)
+ data <- matrix(NA, length(all_elements), length(all_elements), dimnames = list(all_elements, all_elements))
data[as.matrix(data_raw[,c("SetA","SetB")])] <- data_raw$Value
data[as.matrix(data_raw[,c("SetB","SetA")])] <- data_raw$Value
+ # save(data, file = "test2.RData")
} else {
rowSet <- unique(data_raw$SetA)
colSet <- unique(data_raw$SetB)
data <- matrix(0, ncol = length(colSet), nrow = length(rowSet), dimnames = list(rowSet, colSet))
data[as.matrix(data_raw[,c("SetA","SetB")])] <- data_raw$Value
}
}else{ # Load matrix
if(!is.null(opt$npy)){
- axis_labels <- read.table(opt$npy, header=FALSE, stringsAsFactors=FALSE)
data <- npyLoad(opt$data_file)
- colnames(data) <- axis_labels$V1
+ axis_files <- unlist(strsplit(opt$npy, ','))
+ axis_labels <- read.table(axis_files[1], header=FALSE, stringsAsFactors=FALSE, sep="\t")
rownames(data) <- axis_labels$V1
+ if(length(axis_files) == 2){
+ x_axis_labels <- read.table(axis_files[2], header=FALSE, stringsAsFactors=FALSE, sep="\t")
+ colnames(data) <- x_axis_labels$V1
+ }else{
+ colnames(data) <- axis_labels$V1
+ }
}else{
data <- as.matrix(read.table(opt$data_file, sep="\t", header=opt$header, stringsAsFactors=FALSE, row.names= 1, check.names = FALSE))
}
}
@@ -185,36 +190,36 @@
######### CLUSTERING
if(opt$same_sets){
hr <- fastcluster::hclust(as.dist(matrix_transf), method="ward.D2")
groups <- cluster_obj_to_groups(matrix_transf, hr, opt$tree_cut_method, minProportionCluster = opt$minProportionCluster)
- sim_between_groups <- calc_sim_between_groups(data, groups)
- distance_between_groups <- 1 - sim_between_groups
- groups_clustered <- fastcluster::hclust(as.dist(distance_between_groups), method="ward.D2")
-
- # Plot dendrogram to check performance
- # png(file=file.path(opt$output, 'dendrogram_groups.png', sep=''))
- # plot(dendrogram_groups)
- # dev.off()
- ######### EXPORT
write.table(groups, file=paste0(opt$output, '_clusters.txt'), sep="\t", quote=FALSE, col.names=FALSE, row.names= TRUE)
if (opt$save_raw_clust){
dendrogram_groups <- as.dendrogram(hr)
- } else {
+ } else { # TODO Pedro: I have no idea about the reason for the following code
+ sim_between_groups <- calc_sim_between_groups(data, groups)
+ distance_between_groups <- 1 - sim_between_groups
+ groups_clustered <- fastcluster::hclust(as.dist(distance_between_groups), method="ward.D2")
dendrogram_groups <- as.dendrogram(groups_clustered)
}
+ # Plot dendrogram to check performance
+ # png(file=file.path(opt$output, 'dendrogram_groups.png', sep=''))
+ # plot(dendrogram_groups)
+ # dev.off()
save(dendrogram_groups, file=paste0(opt$output, '_dendrogram_groups.RData', sep=''))
}else{
# Calc similitudes of rows
mdistRows = toDistances(matrix_transf)
mdistCols = toDistances(matrix_transf, FALSE)
+ rownames(mdistRows) <- rownames(matrix_transf) # The square matrixes obtained have lost
+ colnames(mdistRows) <- rownames(matrix_transf) # row and col names. We retrieve them
+ rownames(mdistCols) <- colnames(matrix_transf) # from original matrix. In previous case,
+ colnames(mdistCols) <- colnames(matrix_transf) # the matrix_transf is used directly by hclust
# Obtaing clustering
- quantValue_row <- quantile(mdistRows, c(.2), na.rm = TRUE)
hr_row <- fastcluster::hclust(as.dist(mdistRows), method="ward.D2")
- groups_row <- cluster_obj_to_groups(mdistRows, hr, opt$tree_cut_method)
-
+ groups_row <- cluster_obj_to_groups(mdistRows, hr_row, opt$tree_cut_method, minProportionCluster = opt$minProportionCluster)
quantValue_col <- quantile(mdistCols, c(.2), na.rm = TRUE)
hr_col <- fastcluster::hclust(as.dist(mdistCols), method="ward.D2")
groups_col <- cutree(hr_col, h = quantValue_col)
######### EXPORT
write.table(groups_row, file=paste(opt$output, '_clusters_rows.txt', sep=''), sep="\t", quote=FALSE, col.names=FALSE)
@@ -225,21 +230,20 @@
if(opt$pdf){
pdf(paste0(opt$output, '_heatmap.pdf'), width = 1000, height = 1000, units = "px", res=175, pointsize = 8)
}else{
png(paste0(opt$output, '_heatmap.png'), width = 1000, height = 1000, units = "px", res=175, pointsize = 8)
}
- if(opt$same_sets){
- group_colours <- colorRampPalette(brewer.pal(8, "Set1"))(length(unique(groups)))
- # print("cluster number:")
- # print(length(unique(groups)))
- # print(length(unique(group_colours)))
- group_colours_arranged <- group_colours[groups]
- heatmap.2(data, Rowv=as.dendrogram(hr), Colv=as.dendrogram(hr), trace="none", col=brewer.pal(11,"RdBu"), dendrogram = c("row"), labRow = FALSE, labCol = FALSE,
- xlab = opt$collabel, ylab = opt$rowlabel, RowSideColors=group_colours_arranged)
- }else{
- heatmap.2(data, Rowv=as.dendrogram(hr_row), Colv=as.dendrogram(hr_col), trace="none", col=brewer.pal(11,"RdBu"), labRow = FALSE, labCol = FALSE,
- xlab = opt$collabel, ylab = opt$rowlabel)
+if(opt$same_sets){
+ group_colours <- colorRampPalette(brewer.pal(8, "Set1"))(length(unique(groups)))
+ group_colours_arranged <- c(rep('#000000', length(groups[groups == 0])), group_colours[groups])
+ heatmap.2(data, Rowv=as.dendrogram(hr), Colv=as.dendrogram(hr), trace="none", col=brewer.pal(11,"RdBu"), dendrogram = c("row"), labRow = FALSE, labCol = FALSE,
+ xlab = opt$collabel, ylab = opt$rowlabel, RowSideColors=group_colours_arranged)
+}else{
+ group_colours <- colorRampPalette(brewer.pal(8, "Set1"))(length(unique(groups_row)))
+ group_colours_arranged <- c(rep('#000000', length(groups_row[groups_row == 0])), group_colours[groups_row])
+ heatmap.2(data, Rowv=as.dendrogram(hr_row), Colv=as.dendrogram(hr_col), trace="none", col=brewer.pal(11,"RdBu"), dendrogram = c("row"), labRow = FALSE, labCol = FALSE,
+ xlab = opt$collabel, ylab = opt$rowlabel, RowSideColors=group_colours_arranged)
- }
+}
dev.off()
# save.image("test.RData")