Skip to content

googlegroupsformatter

bhive01 edited this page Nov 16, 2011 · 14 revisions
require(mclust)
require(ggplot2)
require(ellipse)

#if function, input is original dataset
orig.data<-iris[1:4]

data.mclust<-Mclust(orig.data)

BIC.data<-as.data.frame(data.mclust$BIC)
BIC.data$NumComp<-rownames(BIC.data)
melted.BIC<-melt(BIC.data, var.ids= "NumComp")

ggplot(melted.BIC, aes(x=as.numeric(NumComp), y=value, colour=variable, group=variable))+
scale_x_continuous("Number of Components")+
scale_y_continuous("BIC")+
scale_colour_hue("")+
geom_point()+
geom_line()+
theme_bw()


#plotting correlation matrix of original data colored by model classification
#stripped from plotmatrix()
mapping=aes(colour=as.factor(data.mclust$classification), shape=as.factor(data.mclust$classification))

grid <- expand.grid(x = 1:ncol(orig.data), y = 1:ncol(orig.data))
grid <- subset(grid, x != y)
all <- do.call("rbind", lapply(1:nrow(grid), function(i) {
							   xcol <- grid[i, "x"]
							   ycol <- grid[i, "y"]
							   data.frame(xvar = names(orig.data)[ycol], yvar = names(orig.data)[xcol], 
										  x = orig.data[, xcol], y = orig.data[, ycol], orig.data)
							   }))
all$xvar <- factor(all$xvar, levels = names(orig.data))
all$yvar <- factor(all$yvar, levels = names(orig.data))
densities <- do.call("rbind", lapply(1:ncol(orig.data), function(i) {
									 data.frame(xvar = names(orig.data)[i], yvar = names(orig.data)[i], 
												x = orig.data[, i])
									 }))
mapping <- defaults(mapping, aes_string(x = "x", y = "y"))
class(mapping) <- "uneval"
ggplot(all, mapping) + 
facet_grid(xvar ~ yvar, scales = "free") + 
geom_point(na.rm = TRUE) + 
stat_density(aes(x = x, y = ..scaled.. * diff(range(x)) + min(x)), data = densities, position = "identity", colour = "grey20", geom = "line") +
opts(legend.postion= "none")

#cant get rid of the damn legend
#it would be a bunch faster if it only plotted 1 v 2 and not also 2 v 1... cest la vie


#scatter plot of width by length
#color and shape by classification


#center of cluster by data.mclust$parameters$mean

get.ellipses <- function(coords, mclust.fit){
	centers <- mclust.fit$parameters$mean[coords, ]
	vars <- mclust.fit$parameters$variance$sigma[coords, coords, ]
	ldply(1:ncol(centers), function(cluster){
		  data.frame(ellipse(vars[,,cluster], centre = centers[, cluster],
							 level = 0.5), classification = cluster)
		  })
}

iris.el <- get.ellipses(c("Sepal.Length", "Sepal.Width"), data.mclust)
orig.data$classification <- data.mclust$classification

ggplot(orig.data, aes(Sepal.Length, Sepal.Width, colour = factor(classification))) +
geom_point(aes(shape = classification))+
geom_path(data = iris.el,
aes(group = classification, linetype = classification))

x4 <- c(330, 835, 317, 396, 486, 391)
y4 <- c(4098.11, 4099.14, 4098.16, 4098.35, 4099.30, 4096.73)
y4b <- c(0.47, 0.29, 0.46, 0.83, 0.72, 0.87)
y4min<-y4-y4b
y4max<-y4+y4b
dat.tgg817a <- data.frame(x4, y4, y4b,y4max, y4min)

t4 <- sum(y4/y4b)/(sum(1/y4b))

x5 <- c(721, 257, 204, 271)
y5 <- c(4096.33, 4095.51, 4095.29, 4095.19)
y5b <- c(0.10, 0.15, 0.18, 0.22)
y5min<-y5-y5b
y5max<-y5+y5b
dat.tgg817b <- data.frame(x5, y5, y5b, y5max, y5min)

t5 <- sum(y5/y5b)/(sum(1/y5b))

ggplot()+
geom_point(data=dat.tgg817a, aes(x=x4, y=y4))+
geom_errorbar(data=dat.tgg817a, aes(x=x4, y=y4, ymin= y4min, ymax= y4max), width = 3, colour='NavyBlue')+
geom_hline(aes(yintercept=t4), colour ='NavyBlue', size=2)+
geom_point(data=dat.tgg817b, aes(x=x5, y=y5), colour='DarkRed')+
geom_hline(aes(yintercept=t5), colour ='DarkRed', size=2)+
geom_errorbar(data=dat.tgg817b, aes(x=x5, y=y5, ymin= y5min, ymax= y5max), width = 3, colour='DarkRed')+
coord_cartesian(ylim=c(4094,4101), wise=TRUE)

Note: The ggplot2 wiki is no longer maintained, please use the ggplot2 website instead!

Clone this wiki locally