Use pheatmap to draw heat maps in R

From BITS wiki
Jump to: navigation, search

pheatmap converts large tables (eg microarray data) into coloured clustered heatmaps


[ Main_Page ]



pheatmap.png

Help on the pheatmap package can be found on the CRAN site (http://cran.r-project.org/web/packages/pheatmap/index.html). In order to use this code, please copy paste it into RStudio in a blank 'R Markdown' document.

Generate heat maps from tabular data with the R package "pheatmap"
========================================================
SP:BITS©2013
 
This is an example use of **pheatmap** with kmean clustering and plotting of each cluster as separate heatmap. 
The code below is made redundant to examplify different ways to use 'pheatmap'.
The data was purposely taken from a R-dataset to ease distribution but similar results will be obtained any other multicolumn dataframe.
 
Load libraries and read data in
 
```{r}
library("pheatmap")
library("RColorBrewer")
## if not installed, quickly add it as follows:
#source("http://bioconductor.org/biocLite.R")
#biocLite(c("RColorBrewer", "pheatmap"))
 
data(USArrests)
data <- USArrests
head(data)
 
# save all results to current dir
basedir <- getwd() 
```
 
Plot full heatmap clustered by rows and columns by euclidean distance
 
```{r, fig.keep='first'}
# ```{r, fig.keep='none'} to avoid printing blank figure markup-output
# ```{r, fig.keep='first'} to print only the first figure
 
# store the heatmap into an object in order to to recover kmean information
 
# decide of the number of k-mean clusters to build
# use clValid or other functions to better define this number (out of scope here!)
maxclust <- 5
 
# create color palet
col.pal <- brewer.pal(9,"Blues")
 
# define metrics for clustering
drows1 <- "euclidean"
dcols1 <- "euclidean"
 
# prepare path and file.name for output
filename <- "my.pheatmap.pdf"
outfile <- paste(basedir, filename, sep="/")
 
# create heatmap
# type "?pheatmap()" for more help
hm.parameters <- list(data, 
  color = col.pal,
  cellwidth = 15, cellheight = 12, scale = "none",
  treeheight_row = 200,
  kmeans_k = NA,
  show_rownames = T, show_colnames = T,
  main = "Full heatmap (avg, eucl, unsc)",
  clustering_method = "average",
  cluster_rows = TRUE, cluster_cols = TRUE,
  clustering_distance_rows = drows1, 
  clustering_distance_cols = dcols1)
 
  # To draw the heat map on screen 
  do.call("pheatmap", hm.parameters)
 
  # To draw to file 
  do.call("pheatmap", c(hm.parameters, filename=outfile))
```
 
Plot each cluster as heatmap
 
```{r, fig.keep='first'}
# plot 2-5 kmean clusters
for (maxclust in c(2:maxclust)) {
  filename <- paste("heatmap.for.", maxclust, "-clusters.pdf", sep="")
  outfile <- paste(basedir, filename, sep="/")
 
  main <- paste("result for ", maxclust, " clusters", sep="")
  hmx.parameters <- list(data, 
    color = col.pal,
    cellwidth = 15, cellheight = 12, scale = "none",
    treeheight_row = 200,
    kmeans_k = maxclust,
    show_rownames = T, show_colnames = T, 
    main = main,
    clustering_method = "average",
    cluster_rows = TRUE, cluster_cols = TRUE,
    clustering_distance_rows = drows1, 
    clustering_distance_cols = dcols1)
 
  # To store cluster mappings and draw
  kmean.hm <- do.call("pheatmap", hmx.parameters)
 
  # To draw on screen 
  do.call("pheatmap", hmx.parameters)
 
  # To draw to file 
  do.call("pheatmap", c(hmx.parameters, filename=outfile))
 
  # add cluster number to matrix and save
  clustnum <- kmean.hm[["kmeans"]][["cluster"]]
  clustered.data <- cbind(data, clustnum)
  last <- ncol(clustered.data)-1
 
  # inspect data
  cat (paste("Data for max-clust= ", maxclust, "\n", sep=""))
  print(head(clustered.data))
 
  filename <- paste("Clustered.for.", maxclust, "-clusters.tsv", sep="")
  outfile <- paste(basedir, filename, sep="/")
 
  write.table(clustered.data, file=outfile, quote = FALSE, sep="\t", 
              col.names = T, row.names = T)
 
  # plot 'maxclust' cluster detailed heatmap's
  clustered.data <- as.data.frame(clustered.data)
 
  for (clust in 1:maxclust){
    # sample data
    cluster <- subset(clustered.data, clustered.data$clustnum==clust)[1:last]
 
    # prepare output
    filename <- paste("cluster.", clust, "_of_", maxclust,".pdf", sep="")
    outfile <- paste(basedir, filename, sep="/")
 
    main <- paste("cluster #", clust, " of #", maxclust, sep="")
 
    cluster.parameters <- list(cluster, 
      color = col.pal,
      cellwidth = 15, cellheight = 12, 
      scale = "none",
      treeheight_row = 200,
      kmeans_k = NA,
      show_rownames = T, show_colnames = T, 
      main = main,
      clustering_method = "average",
      cluster_rows = TRUE, cluster_cols = TRUE,
      clustering_distance_rows = drows1, 
      clustering_distance_cols = dcols1)
 
      # To draw the heat map on screen 
      do.call("pheatmap", cluster.parameters)
 
      # To draw to file 
      do.call("pheatmap", c(cluster.parameters, filename=outfile))
    }
    # next maxclust value
  }
```
# end