Differences

This shows you the differences between two versions of the page.

--- mkatari-bioinformatics-august-2013-clustering [2014/12/15 12:01] – mkatari
+++ mkatari-bioinformatics-august-2013-clustering [2015/06/17 13:26] (current) – mkatari
@@ Line 4: / Line 4: @@
 ====== Clustering rna-seq data ======
 continuation from [[mkatari-bioinformatics-august-2013-deseq|DESeq]]
+[[https://drive.google.com/file/d/0B172nc4dAaaORnh3MkZqUE9PVjA/view?usp=sharing|resSig.txt]]
+[[https://drive.google.com/file/d/0B172nc4dAaaONXFIX2YxeDRCbkE/view?usp=sharing|normalized.txt]]
+In case you didn't get DESeq to work download and load the files above
+<code>
+resSig = read.table("resSig.txt", header=T)
+normalized = read.table("normalized.txt", header=T, row.names=1)
+</code>
 Get the significant genes
@@ Line 12: / Line 23: @@
 Get the normalized values for the significant genes
 <code>
-sigGenes.normalized = normalized[sigGenes,]
+sigGenes.normalized = normalized[as.character(sigGenes),]
 </code>
@@ Line 35: / Line 46: @@
 <code>
-sigGenes.hclust.k2<-cutree(sigGenes.normalized.hclust, k=2, nstart=25)
+sigGenes.hclust.k2<-cutree(sigGenes.normalized.hclust, k=2)
 </code>
@@ Line 90: / Line 101: @@
 <code>
-SigGenes.kmeans.2 = kmeans(t(scaledSigGenes), 2)
+SigGenes.kmeans.2 = kmeans(scaledSigGenes, 2, nstart=25)
 </code>
@@ Line 107: / Line 118: @@
   for (i in 2:20) {
-     kmeans_tmp=kmeans(x, 2, nstart=25)
+     kmeans_tmp=kmeans(x, i, nstart=25)
-     kmeans_ss[i] = kmeans_tmp$betweenss/kmeans_tmp$totss
+     #alternate way of looking at proportion of ss that is provided by between groups.
+     #kmeans_ss[i] = kmeans_tmp$betweenss/kmeans_tmp$totss
+     #using silhouette width to evaluate clusters.
+     kmeans_sil= (kmeans_tmp$betweenss-kmeans_tmp$withinss)/max(kmeans_tmp$betweenss, kmeans_tmp$withinss)
+     kmeans_ss[i] = mean(kmeans_sil)
   }
   return(kmeans_ss)
@@ Line 125: / Line 142: @@
+The code below plots k-means clustering results. You simply have to provide the k-means output and the labels.
+<code>
+plotClusterCenters<-function(kmeansres,
+                             myxlab="Treatment",
+                             myylab="Expression",
+                             mymain="K-means Clusters") {
+  mycolors=c("blue","red","green","orange","pink","black")
+  centersdim = dim(kmeansres$centers)
+  plot(kmeansres$centers[1,],
+       type="b",
+       col=mycolors[1],
+       xlab=myxlab,
+       ylab=myylab,
+       main=mymain,
+       ylim=c(round(min(kmeansres$centers)),
+                               round(max(kmeansres$centers))),
+                               xaxt="n")
+  axis(1, at=c(1:centersdim[2]), labels=names(kmeansres$centers[1,]))
+  for (i in 2:centersdim[1]) {
+    lines(kmeansres$centers[i,], type="b", col=mycolors[i])
+  }
+}
+plotClusterCenters(SigGenes.kmeans.2)
 </code>
@@ Line 132: / Line 179: @@
 <code>
+install.packages("gplots")
 library(gplots)
 </code>
@@ Line 151: / Line 199: @@
 <code>
 pdf("heatmap.pdf")
-heatmap.2(sigGenes.normalized,
+heatmap.2(as.matrix(sigGenes.normalized),
           col=redgreen(75),
           hclustfun=hclust2,