Zhengdeng Lei, PhD

Zhengdeng Lei, PhD

2009 - Present Research Fellow at Duke-NUS, Singapore
2007 - 2009 High Throughput Computational Analyst, Memorial Sloan-Kettering Cancer Center, New York
2003 - 2007 PhD, Bioinformatics, University of Illinois at Chicago

Tuesday, April 5, 2011

wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz >out.txt &

tar -zxvf chromFa.tar.gz
//remove random, unmapped, haplotype, chrM
rm *random*
rm *Un*
rm *hap*
rm chrM.fa

cat chr*.fa > hg19.fa
To check what chromosome inside:
grep ">" hg19.fa

//build ref genome index
cd /home/leiz/NGS/hg19
bwa index -a bwtsw /home/leiz/NGS/hg19/hg19.fa &
// use top -u leiz to check progress

//get RNA-seq raw data for AGS cell line
cd /home/leiz/NGS/GEMINI
scp NUSSTF\\gmslz@172.25.138.12:/data/public/RNA-seq_illumina_raw_data/YMM/AGS*  ./

//align by bwa
cd /home/leiz/NGS/GEMINI
bwa aln ../hg19/hg19.fa AGS_1_sequence.fastq >AGS_1.sai &
bwa aln ../hg19/hg19.fa AGS_2_sequence.fastq >AGS_2.sai &
./bwa sampe ../hg19/hg19.fa AGS_1.sai AGS_2.sai  AGS_1_sequence.fastq  AGS_2_sequence.fastq > AGS.pe.bwa.sam

//index hg19
cd /home/leiz/NGS/hg19
samtools faidx hg19.fa &


//sed 's/[ \t]*$//g' samfile
// import BWA alignment
cd /home/leiz/NGS/GEMINI

//sed 's/[ \t]*$//g' AGS.pe.bwa.sam >AGS.pe.bwa.sed.sam








//ref: http://seqanswers.com/forums/showthread.php?t=9650
//difference with samtools view -bt?

samtools import ../hg19/hg19.fa.fai AGS.pe.bwa.sed.sam AGS.pe.bwa.bam

//sort bam

samtools sort AGS.pe.bwa.bam AGS.pe.bwa.sort &

//remove PRC duplicates by picard, use AS=true so that picard knows the bam is sorted.

java -jar ../picard-tools-1.44/MarkDuplicates.jar I=AGS.pe.bwa.sort.bam O=AGS.pe.bwa.sort.rmdup.bam M=PCR_duplicates.M.txt REMOVE_DUPLICATES=true AS=true



//index sorted bam
samtools index AGS.pe.bwa.sort.rmdup.bam

//consensus calling
samtools pileup -cf ../hg19/hg19.fa AGS.pe.bwa.sort.rmdup.bam > AGS.pe.bwa.sort.rmdup.bam.pileup &


samtools pileup -cvf ../hg19/hg19.fa AGS.pe.bwa.sort.rmdup.bam > AGS.variants.pileup &


perl ../samtools/misc/samtools.pl varFilter AGS.variants.pileup | awk '$6>=20' > AGS.final.pileup





http://sourceforge.net/apps/mediawiki/samtools/index.php?title=SAM_FAQ

I want to call SNPs and short indels.



samtools pileup -vcf ref.fa aln.bam | tee raw.txt | samtools.pl varFilter -D100 > flt.txt
awk '($3=="*"&&$6>=50)||($3!="*"&&$6>=20)' flt.txt > final.txt





http://plindenbaum.blogspot.com/2010/04/bam-bwa-and-samtools-my-notebook.html

http://seqanswers.com/forums/showthread.php?t=8364

#reference is yeast.nt
#query is yeast.fasta

bwa index /home/jill/bwa-0.5.8c/Project/yeast.fasta 

bwa index /home/jill/bwa-0.5.8c/Project/yeast.nt

bwa aln /home/jill/bwa-0.5.8c/Project/yeast.nt /home/jill/bwa-0.5.8c/Project/yeast.fasta  > /home/jill/bwa-0.5.8c/Project/yeastbwaout.sai

#[bwa_aln] 17bp reads: max_diff = 2
#[bwa_aln] 38bp reads: max_diff = 3
#[bwa_aln] 64bp reads: max_diff = 4
#[bwa_aln] 93bp reads: max_diff = 5
#[bwa_aln] 124bp reads: max_diff = 6
#[bwa_aln] 157bp reads: max_diff = 7
#[bwa_aln] 190bp reads: max_diff = 8
#[bwa_aln] 225bp reads: max_diff = 9
#[bwa_aln_core] calculate SA coordinate... 0.15 sec
#[bwa_aln_core] write to the disk... 0.00 sec
#[bwa_aln_core] 1 sequences have been processed.

bwa samse /home/jill/bwa-0.5.8c/Project/yeast.nt  /home/jill/bwa-0.5.8c/Project/yeastbwaout.sai /home/jill/bwa-0.5.8c/Project/yeast.fasta  > /home/jill/bwa-0.5.8c/Project/yeastbwaout.sam 

#[bwa_aln_core] convert to sequence coordinate... 0.01 sec
#[bwa_aln_core] refine gapped alignments... 0.00 sec
#[bwa_aln_core] print alignments... 0.01 sec
#[bwa_aln_core] 1 sequences have been processed.

samtools faidx /home/jill/bwa-0.5.8c/Project/yeast.nt

samtools view -bt /home/jill/bwa-0.5.8c/Project/yeast.nt.fai /home/jill/bwa-0.5.8c/Project/yeastbwaout.sam > /home/jill/bwa-0.5.8c/Project/yeastbwaout.bam

#[samopen] SAM header is present: 17 sequences.

#index bam
samtools index /home/jill/bwa-0.5.8c/Project/yeastbwaout.bam

#Sorts bam
samtools sort /home/jill/bwa-0.5.8c/Project/yeastbwaout.bam /home/jill/bwa-0.5.8c/Project/yeastbwaoutsort 

samtools index /home/jill/bwa-0.5.8c/Project/yeastbwaoutsort.bam

samtools pileup -vcf /home/jill/bwa-0.5.8c/Project/yeast.nt.fai /home/jill/bwa-0.5.8c/Project/yeastbwaoutsort.bam | tee /home/jill/bwa-0.5.8c/Project/raw.txt | /home/jill/samtools-0.1.11/misc/samtools.pl varFilter -D100 > /home/jill/bwa-0.5.8c/Project/flt.txt
awk '($3=="*"&&$6>=50)||($3!="*"&&$6>=20)' /home/jill/bwa-0.5.8c/Project/flt.txt > /home/jill/bwa-0.5.8c/Project/final.txt

No comments:

Post a Comment