library(cluster) rally <- read.table("rally.csv",header=T,sep=",") source("psopts.r") n <- ncol(rally) X <- t(rally[,2:n]) n <- nrow(X) p <- ncol(X) X.pca <- princomp(X,covmat = cov.wt(X,center=F)) #X.pca <- princomp(X) print(summary(X.pca)) W <- -X.pca$loadings X.px <- X%*%W postscript(file="rally_var.eps") screeplot(X.pca,main="") dev.off() postscript(file="rally_pca.eps") plot(X.px[,1:2],type="n") text(X.px[,1:2],labels = rownames(X.px),cex=0.5) dev.off() list = c("ward","complete","average","single") for (method in list) { h <- hclust(dist(X),method=method) print(summary(h)) print(cutree(h,k=3)) meth <- substr(method,1,4) filename <- paste("rally_hcl_",meth,".eps",sep="") postscript(file=filename) plot(h,cex=0.6,lwd=0.6,ann=F) dev.off() } rn <- rownames(X) # Check that column numbers print(c(rn[70],rn[1],rn[3])) # are correct. tcm5y <- X[70,] sp500 <- X[1,] nasdaq <- X[3,] rn.px <- rownames(X.px) # Check that column numbers print(c(rn.px[70],rn.px[1],rn.px[3])) # are correct. tcm5y.px <- X.px[70,1:2] sp500.px <- X.px[1,1:2] nasdaq.px <- X.px[3,1:2] ctr <- rbind(tcm5y, sp500, nasdaq) ctr.px <- ctr%*%W postscript(file="rally_km0.eps") plot(X.px[,1:2], type="n") text(X.px[,1:2],labels = rownames(X),cex=0.5) points(ctr.px[,1:2],pch=19,cex=1.5,col=c("black","red","green")) dev.off() clust <- mat.or.vec(n,3) for (i in 1:n) { clust[i,] <- order(c(sum((X[i,]-tcm5y)^2), sum((X[i,]-sp500)^2), sum((X[i,]-nasdaq)^2))) } postscript(file="rally_km1.eps") plot(X.px[,1:2], type="n") text(X.px[,1:2], labels = rownames(X),cex=0.5,col=clust[,1]) points(ctr.px[,1:2],pch=19,cex=1.5,col=c("black","red","green")) dev.off() ctr <- rbind(tcm5y, sp500, nasdaq) ctr.px <- ctr%*%W km <- kmeans(X,ctr,20) print(summary(km)) ctr <- km$centers ctr.px <- ctr%*%W postscript(file="rally_km2.eps") plot(X.px[,1:2], type="n") text(X.px[,1:2], labels = rownames(X),cex=0.5,col=km$cluster) points(ctr.px[,1:2],pch=19,cex=1.5,col=c("black","red","green")) dev.off() postscript(file="rally_km.eps") plot(X.px[,1:2], type="n") text(X.px[,1:2], labels = rownames(X),cex=0.5,col=km$cluster) dev.off() postscript(file="rally_ret.eps") plot(x=X[,3],y=X[,9], type="n", xlab="October 21, 1987, returns", ylab="July, 24, 2002, returns" ) text(x=X[,3],y=X[,9], labels = rownames(X),cex=0.5,col=km$cluster) dev.off()