Zhengdeng Lei, PhD
Zhengdeng Lei, PhD
2007 - 2009 High Throughput Computational Analyst, Memorial Sloan-Kettering Cancer Center, New York
2003 - 2007 PhD, Bioinformatics, University of Illinois at Chicago
Tuesday, May 22, 2012
Batch Effect in AU
date2col <- function(date.list)
{
clr.template = c("red", "orange", "yellow", "green", "cyan", "blue", "purple")
num.dates <- length(date.list)
clr.list <- vector()
clr.list[1] <- "red"
c.index <- 0
for (i in 2:num.dates) {
if(date.list[i] == date.list[i-1]) {
clr.list[i] = clr.list[i-1]
} else {
c.index <- c.index+1
clr.list[i] = clr.template[c.index %% 7+1]
}
}
return(clr.list)
}
wk.dir <- "E:\\CEL\\GastricCancer\\AU\\PM_data_new\\Gastric_Affy_files\\Tumors"
setwd(wk.dir)
file.info.file <- "files.info.user.batch.txt"
file.info <- read.table(file=file.info.file, header=T, row.names=1)
file.info[order(file.info$EXP_DATE),]
file.info <- file.info[order(as.POSIXct(strptime(file.info$EXP_DATE, "%m/%d/%Y"))),]
my.color <- file.info$EXP_DATE
my.color <- date2col(my.color)
data <- read.table(file="AU_GC70.rma.txt", header=T, row.names=1)
data.ctrl <- data[54614:54675, rownames(file.info)]
library("gplots")
data <- sweep(data.ctrl, 1, apply(data.ctrl, 1, median)) #just median centered
data[data < -4] <- -4
data[data > 4] <- 4
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
pdf(file = "Batch_in_CtrlGenes1.pdf", width=10, height=10)
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
dev.off()
data <- t(scale(t(data.ctrl), scale=T)) #standardized by row(gene)
data[data < -3] <- -3
data[data > 3] <- 3
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
pdf(file = "Batch_in_CtrlGenes2.pdf", width=10, height=10)
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
dev.off()
genes<-data.ctrl
genes<-t(genes)
pcs<-prcomp(genes)
summary(pcs) #select first N=10 PCs depending on Cumulative Proportion (e.g. >= 97.7%)
#pcs$x[,1:10]
#write.table(pcs$x[,1:15], file=ctrl.genes.pcs, sep = "\t")
#pcs<-prcomp(data[1:22215,])
library(scatterplot3d)
PC1<-pcs$x[,1]
PC2<-pcs$x[,2]
PC3<-pcs$x[,3]
group.colors <- my.color
group.colors <- file.info$COLOR
scatterplot3d(PC1,PC2,PC3, main="PCA scatterplot before ComBat normalization", color=group.colors, pch=16)
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment