User Tools

Site Tools


mkatari-bioinformatics-august-2013-clustering

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
mkatari-bioinformatics-august-2013-clustering [2014/12/15 12:01]
mkatari
mkatari-bioinformatics-august-2013-clustering [2015/06/17 13:26] (current)
mkatari
Line 4: Line 4:
 ====== Clustering rna-seq data ====== ====== Clustering rna-seq data ======
 continuation from [[mkatari-bioinformatics-august-2013-deseq|DESeq]] continuation from [[mkatari-bioinformatics-august-2013-deseq|DESeq]]
 +
 +[[https://drive.google.com/file/d/0B172nc4dAaaORnh3MkZqUE9PVjA/view?usp=sharing|resSig.txt]]
 +[[https://drive.google.com/file/d/0B172nc4dAaaONXFIX2YxeDRCbkE/view?usp=sharing|normalized.txt]]
 +
 +In case you didn't get DESeq to work download and load the files above
 +
 +<code>
 +resSig = read.table("resSig.txt", header=T)
 +normalized = read.table("normalized.txt", header=T, row.names=1)
 +
 +</code>
  
 Get the significant genes Get the significant genes
Line 12: Line 23:
 Get the normalized values for the significant genes Get the normalized values for the significant genes
 <code> <code>
-sigGenes.normalized = normalized[sigGenes,]+sigGenes.normalized = normalized[as.character(sigGenes),]
 </code> </code>
  
Line 35: Line 46:
  
 <code> <code>
-sigGenes.hclust.k2<-cutree(sigGenes.normalized.hclust, k=2, nstart=25)+sigGenes.hclust.k2<-cutree(sigGenes.normalized.hclust, k=2)
 </code> </code>
  
Line 90: Line 101:
  
 <code> <code>
-SigGenes.kmeans.2 = kmeans(t(scaledSigGenes), 2)+SigGenes.kmeans.2 = kmeans(scaledSigGenes, 2, nstart=25)
 </code> </code>
  
Line 107: Line 118:
      
   for (i in 2:20) {   for (i in 2:20) {
-     kmeans_tmp=kmeans(x, 2, nstart=25) +     kmeans_tmp=kmeans(x, i, nstart=25) 
-     kmeans_ss[i] = kmeans_tmp$betweenss/kmeans_tmp$totss     +     #alternate way of looking at proportion of ss that is provided by between groups. 
-    +     #kmeans_ss[i] = kmeans_tmp$betweenss/kmeans_tmp$totss    
 + 
 +     #using silhouette width to evaluate clusters. 
 +     kmeans_sil= (kmeans_tmp$betweenss-kmeans_tmp$withinss)/max(kmeans_tmp$betweenss, kmeans_tmp$withinss)  
 +     kmeans_ss[i] = mean(kmeans_sil) 
 + 
   }   }
   return(kmeans_ss)   return(kmeans_ss)
Line 125: Line 142:
  
  
 +The code below plots k-means clustering results. You simply have to provide the k-means output and the labels.
 +
 +<code>
 +plotClusterCenters<-function(kmeansres, 
 +                             myxlab="Treatment", 
 +                             myylab="Expression",
 +                             mymain="K-means Clusters") {
 +  
 +  mycolors=c("blue","red","green","orange","pink","black")
 +  centersdim = dim(kmeansres$centers)
 +  plot(kmeansres$centers[1,], 
 +       type="b", 
 +       col=mycolors[1], 
 +       xlab=myxlab,
 +       ylab=myylab,
 +       main=mymain,
 +       ylim=c(round(min(kmeansres$centers)),
 +                               round(max(kmeansres$centers))),
 +                               xaxt="n")
 +  
 +  axis(1, at=c(1:centersdim[2]), labels=names(kmeansres$centers[1,]))
 +  
 +  for (i in 2:centersdim[1]) {
 +    lines(kmeansres$centers[i,], type="b", col=mycolors[i])
 +  }
 +  
 +}
 +
 +
 +plotClusterCenters(SigGenes.kmeans.2)
 </code> </code>
  
Line 132: Line 179:
  
 <code> <code>
 +install.packages("gplots")
 library(gplots) library(gplots)
 </code> </code>
Line 151: Line 199:
 <code> <code>
 pdf("heatmap.pdf") pdf("heatmap.pdf")
-heatmap.2(sigGenes.normalized, +heatmap.2(as.matrix(sigGenes.normalized)
           col=redgreen(75),           col=redgreen(75),
           hclustfun=hclust2,           hclustfun=hclust2,
mkatari-bioinformatics-august-2013-clustering.1418644900.txt.gz · Last modified: 2014/12/15 12:01 by mkatari