Zhengdeng Lei, PhD

Zhengdeng Lei, PhD

2009 - Present Research Fellow at Duke-NUS, Singapore
2007 - 2009 High Throughput Computational Analyst, Memorial Sloan-Kettering Cancer Center, New York
2003 - 2007 PhD, Bioinformatics, University of Illinois at Chicago

Tuesday, May 22, 2012

Batch Effect in AU


date2col <- function(date.list)
{
 clr.template = c("red", "orange", "yellow", "green", "cyan", "blue", "purple")
 num.dates <- length(date.list)
 clr.list <- vector()
 clr.list[1] <- "red"
 c.index <- 0
 for (i in 2:num.dates) {
  if(date.list[i] == date.list[i-1]) {
  clr.list[i] = clr.list[i-1]
  } else {
  c.index <- c.index+1
  clr.list[i] = clr.template[c.index %% 7+1]
  }
 }
 return(clr.list)
}


wk.dir <- "E:\\CEL\\GastricCancer\\AU\\PM_data_new\\Gastric_Affy_files\\Tumors"
setwd(wk.dir)

file.info.file <- "files.info.user.batch.txt"
file.info <- read.table(file=file.info.file, header=T, row.names=1)
file.info[order(file.info$EXP_DATE),]
file.info <- file.info[order(as.POSIXct(strptime(file.info$EXP_DATE, "%m/%d/%Y"))),]
my.color <- file.info$EXP_DATE
my.color <- date2col(my.color)


data <- read.table(file="AU_GC70.rma.txt", header=T, row.names=1)
data.ctrl <- data[54614:54675, rownames(file.info)]
library("gplots")



data <- sweep(data.ctrl, 1, apply(data.ctrl, 1, median)) #just median centered
data[data < -4] <- -4
data[data > 4] <- 4
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
pdf(file = "Batch_in_CtrlGenes1.pdf", width=10, height=10)
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
dev.off()


data <- t(scale(t(data.ctrl), scale=T)) #standardized by row(gene)
data[data < -3] <- -3
data[data > 3] <- 3
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
pdf(file = "Batch_in_CtrlGenes2.pdf", width=10, height=10)
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.5)
dev.off()



genes<-data.ctrl
genes<-t(genes)
pcs<-prcomp(genes)
summary(pcs) #select first N=10 PCs depending on Cumulative Proportion (e.g. >= 97.7%)
#pcs$x[,1:10]
#write.table(pcs$x[,1:15], file=ctrl.genes.pcs, sep = "\t")
#pcs<-prcomp(data[1:22215,])
library(scatterplot3d)
PC1<-pcs$x[,1]
PC2<-pcs$x[,2]
PC3<-pcs$x[,3]


group.colors <- my.color
group.colors <- file.info$COLOR



scatterplot3d(PC1,PC2,PC3, main="PCA scatterplot before ComBat normalization", color=group.colors, pch=16)

scatterplot3d(PC3,PC2,PC1, main="PCA scatterplot before ComBat normalization", color=group.colors, pch=16)

scatterplot3d(PC2,PC1,PC3, main="PCA scatterplot before ComBat normalization", color=group.colors, pch=16)


No comments:

Post a Comment