Zhengdeng Lei, PhD

Zhengdeng Lei, PhD

2009 - Present Research Fellow at Duke-NUS, Singapore
2007 - 2009 High Throughput Computational Analyst, Memorial Sloan-Kettering Cancer Center, New York
2003 - 2007 PhD, Bioinformatics, University of Illinois at Chicago

Wednesday, July 4, 2012

Order statistics for combining various genomic data sources



#Order statistics. The rankings from the separate data sources are combined using
#order statistics. A Q statistic is calculated from all rank ratios using the joint
#cumulative distribution of an N-dimensional order statistic as previously done
#by Stuart et al.31
Q <- function(r)
{
r <- sort(r)
N <- length(r)
if (N==1)
{
return (r)
}
s <- 0
for (i in 1:N)
{
del.element.idx <- N-i+1
if (N==i)
{
s = s+(r[N-i+1])*Q(r[-del.element.idx])

}else{
s = s+(r[N-i+1]-r[N-i])*Q(r[-del.element.idx])
}
}
s <- factorial(N)*s
return (s)
}

#r <- c(0.2, 0.05, 0.2)
#Q(r)


d<-matrix(c(sample(1:10),sample(1:10),sample(1:10),sample(1:10)), nrow=10, byrow=F)
rownames(d) <- c(paste("gene", seq(1:10), sep=""))
colnames(d) <- c("gene.exp", "methyl", "CNV", "Mutation")
d
r = d/10
ranking <- apply(r, 1, Q)
sort(ranking)





> d #the smaller the ranking, the more important of the feature, e.g. gene 1 and 3 have highest t-score/foldchange
       gene.exp methyl CNV Mutation
gene1         1      2   2        7
gene2         9      8   8        3
gene3         2      4   3        8
gene4        10      5   1        1
gene5         3     10   4        5
gene6         8      9   7       10
gene7         6      6   9        4
gene8         7      7   5        6
gene9         5      1   6        9
gene10        4      3  10        2
> r = d/10
> ranking <- apply(r, 1, Q)
> sort(ranking)
   gene1    gene4    gene3   gene10    gene9    gene8    gene5    gene7    gene2    gene6
  4.4640  10.7424  29.7216  41.2416  49.6224  68.1120  79.4016 108.7488 144.5472 268.3296
>

No comments:

Post a Comment