Differences

This shows you the differences between two versions of the page.

--- mkatari-bioinformatics-august-2013-clustering [2014/12/11 14:41] – mkatari
+++ mkatari-bioinformatics-august-2013-clustering [2014/12/11 15:24] – [K-means] mkatari
@@ Line 70: / Line 70: @@
 <code>
-sigGenesMean = rowMeans(sigGenes.normalized)
+# this function takes an vector to be calculated.
-sigGenesSD = apply(sigGenes.normalized, 1, sd)
+scaleData <- function(x) {
+  x = as.numeric(x)
+  meanx = mean(x)
+  sdx = sd(x)
+  y = (x-meanx)/sdx
+  return(y)
+}
+#we need to transpose it because apply function returns the genes as different columns.
+scaledSigGenes = t(apply(sigGenes.normalized, 1, scaleData))
+colnames(scaledSigGenes)=colnames(sigGenes.normalized)
+#now to run k-means, in this case we are starting with 2 cluster.
+#just like for heirarchical clustering, we have to first transpose the data so compare genes.
+SigGenes.kmeans.2 = kmeans(t(scaledSigGenes), 2)
+#a plot of the groups
+plot(SigGenes.kmeans.2$centers[1,], SigGenes.kmeans.2$centers[2,])
+# a measure of how well the clustering has performed
+# it is the sum of squares between members of the outside group and sum of squares total
+# higher the better.
+SigGenes.kmeans.2$betweenss/SigGenes.kmeans.2$totss
+#to get the genes in the different clusters
+SigGenes.kmeans.2.group1 = names(which(SigGenes.kmeans.2$cluster==1))
+SigGenes.kmeans.2.group2 = names(which(SigGenes.kmeans.2$cluster==2))
 </code>
 ====== Heatmap ======