Zhengdeng Lei, PhD

Zhengdeng Lei, PhD

2009 - Present Research Fellow at Duke-NUS, Singapore
2007 - 2009 High Throughput Computational Analyst, Memorial Sloan-Kettering Cancer Center, New York
2003 - 2007 PhD, Bioinformatics, University of Illinois at Chicago

Thursday, May 26, 2011

% With iterative feature selection, converged after three runs (consensus clustering)
file = 'E:\Projects\8.ComBAT\ComBat399T\CC_IFS\Run2\K3_consensus_matrix2.txt'


% No iterative feature selection
%file = 'E:\Projects\8.ComBAT\ComBat399T\CC_IFS\K3_consensus_matrix0.txt'


n=399
A = zeros(n, n);

fid = fopen(file, 'r');
row = 1;

% Skip first line
tline = fgetl(fid);




for row=1:n,
tline = fgetl(fid);
LineWith1stCol = regexp(tline, '\t', 'split');
A(row, :) = str2double(LineWith1stCol(1,2:n+1));
end




cd('E:\MATLAB_lib')
v=getcoph(A)





%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function  [coph] = getcoph(a)
% a is the consensus matrix
[m,m]=size(a);
uvec=a(1,2:end);

for i=2:m-1;
uvec=[uvec a(i,i+1:end)]; %get upper diagonal elements of consensus
end

y=1-uvec;                 % consensus are similarities, convert to distances
z=linkage(y,'average');   % use average linkage
coph=cophenet(z,y);

end

Wednesday, May 25, 2011

Download youtube


Go to youtube URL for a video.
copy and paste the following to your chrome address bar 

22 1280x720
javascript:isIE=/*@cc_on!@*/false;isIE ? swfHTML=document.getElementById('movie_player').getElementsByTagName('param')[1].value:swfHTML=document.getElementById("movie_player").getAttribute("flashvars"); 
w=swfHTML.split("&"); for(i=0;i<=w.length-1;i++) if(w[i].split("=")[0] == "fmt_url_map"){links=unescape(w[i].split("=")[1]);break;}abc = links.split(",");for(i=0;i<=abc.length-1;i++){fmt=abc[i].split("|")[0];if(fmt==22){url = abc[i].split("|")[1];window.location.href = url + '&title=' + (((document.title.replace('#',' ')).replace('@',' ')).replace('*',' ')).replace('|',' ');}}


35 854x480

javascript:isIE=/*@cc_on!@*/false;isIE ? swfHTML=document.getElementById('movie_player').getElementsByTagName('param')[1].value:swfHTML=document.getElementById("movie_player").getAttribute("flashvars");
w=swfHTML.split("&"); for(i=0;i<=w.length-1;i++) if(w[i].split("=")[0] == "fmt_url_map"){links=unescape(w[i].split("=")[1]);break;}abc = links.split(",");for(i=0;i<=abc.length-1;i++){fmt=abc[i].split("|")[0];if(fmt==35){url = abc[i].split("|")[1];window.location.href = url + '&title=' + (((document.title.replace('#',' ')).replace('@',' ')).replace('*',' ')).replace('|',' ');}}

34 640x360
18 640x360
javascript:isIE=/*@cc_on!@*/false;isIE ? swfHTML=document.getElementById('movie_player').getElementsByTagName('param')[1].value:swfHTML=document.getElementById("movie_player").getAttribute("flashvars"); 
w=swfHTML.split("&"); for(i=0;i<=w.length-1;i++) if(w[i].split("=")[0] == "fmt_url_map"){links=unescape(w[i].split("=")[1]);break;}abc = links.split(",");for(i=0;i<=abc.length-1;i++){fmt=abc[i].split("|")[0];if(fmt==18){url = abc[i].split("|")[1];window.location.href = url + '&title=' + (((document.title.replace('#',' ')).replace('@',' ')).replace('*',' ')).replace('|',' ');}}


5 320x240
javascript:isIE=/*@cc_on!@*/false;isIE ? swfHTML=document.getElementById('movie_player').getElementsByTagName('param')[1].value:swfHTML=document.getElementById("movie_player").getAttribute("flashvars");w=swfHTML.split("&");for(i=0;i<=w.length-1;i++)if(w[i].split("=")[0] == "fmt_url_map"){links=unescape(w[i].split("=")[1]);break;}abc = links.split(",");for(i=0;i<=abc.length-1;i++){fmt=abc[i].split("|")[0];if(fmt==5){url = abc[i].split("|")[1] + '&title=' + (((document.title.replace('#',' ')).replace('@',' ')).replace('*',' ')).replace('|',' ');window.location.href = url;}}

Friday, May 20, 2011

Standardization

x <- matrix(1:21, ncol=7)

By row (gene)
std.x.by.row <- t(scale(t(x), scale=T))


By column (array)
std.x.by.col <- scale(x, scale=T)



Check the batch effect by date

date2col <- function(date.list)
{
clr.template = c("red", "orange", "yellow", "green", "cyan", "blue", "purple")
num.dates <- length(date.list)
clr.list <- vector()
clr.list[1] <- "red"
c.index <- 0
for (i in 2:num.dates) {
if(date.list[i] == date.list[i-1]) {
clr.list[i] = clr.list[i-1]
} else {
c.index <- c.index+1
clr.list[i] = clr.template[c.index %% 7+1]
}
}
return(clr.list)
}



setwd("E:\\CEL\\GastricCancer\\AU\\PM_data_new\\Gastric_Affy_files\\Tumors")
data <- read.table(file="AU_GC70.rma.txt", header=T, row.names=1)
data.ctrl <- data[54614:54675, ]
library("gplots")


#data <- t(scale(t(data.ctrl), scale=T)) #standardized by row(gene)
#data[data < -3] <- -3
#data[data > 3] <- 3

data <- sweep(data.ctrl, 1, apply(data.ctrl, 1, median)) #just median centered




my.color <- c("8/4/2004","8/4/2004","11/18/2004","11/18/2004","11/25/2004","11/25/2004","11/25/2004","11/25/2004","11/25/2004","11/25/2004","11/25/2004","11/26/2004","11/26/2004","11/26/2004","11/26/2004","11/26/2004","12/2/2004","12/2/2004","12/3/2004","12/3/2004","12/3/2004","12/3/2004","12/3/2004","12/3/2004","12/3/2004","12/3/2004","12/3/2004","1/14/2005","1/14/2005","1/14/2005","1/14/2005","1/14/2005","1/14/2005","2/17/2005","2/17/2005","2/17/2005","2/17/2005","2/17/2005","2/17/2005","2/17/2005","2/25/2005","2/25/2005","3/4/2005","3/4/2005","3/4/2005","3/18/2005","3/18/2005","3/23/2005","4/8/2005","4/8/2005","4/8/2005","4/8/2005","4/8/2005","4/8/2005","4/28/2005","4/28/2005","4/28/2005","4/28/2005","4/29/2005","4/29/2005","4/29/2005","4/29/2005","5/19/2005","5/24/2005","5/24/2005","6/22/2005","6/22/2005","6/22/2005","6/22/2005","6/22/2005")
#my.color <- rep("black",dim(data)[2])
my.color <- date2col(my.color)
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.75)
pdf(file = "Batch_in_CtrlGenes.pdf", width=10, height=10)
#pdf(file = "Batch_in_CtrlGenes.pdf")
hm<-heatmap.2(as.matrix(data), col=greenred(75), scale="none", dendrogram="none", Rowv= T, Colv=F, ColSideColors=my.color, key=TRUE, symkey=FALSE, density.info="none",trace="none", cexRow=0.75,cexCol=0.75)
dev.off()


Wednesday, May 11, 2011

GSAA

High, mid, low activity, e.g. p53
mid vs low -->(GSEA)  ES
high vs low --> High ES??




Supposed we have 200 cell lines, obtain the expression before drug treatment, and obtain GI50 for M drugs.
(NCI60, too small?)



Drug1
Drug2
Drug3
... ...
DrugM
GeneSet1
Corr(1,1) = GSAA1vsGI50 for drug 1 across all cell lines
Corr(1,2) = GSAA1vsGI50 for drug2 across all cell lines



GeneSet2





GeneSet3





GeneSet4





... ...





GeneSetN