User Tools

Site Tools


mkatari-bioinformatics-august-2013-gatknotes

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
mkatari-bioinformatics-august-2013-gatknotes [2015/06/08 07:45] mkatarimkatari-bioinformatics-august-2013-gatknotes [2016/08/17 08:37] (current) mkatari
Line 12: Line 12:
 module load bowtie2 module load bowtie2
 module load samtools module load samtools
 +module load picard
  
 bowtie2-build PTC_Human.fasta PTC_Human bowtie2-build PTC_Human.fasta PTC_Human
 samtools faidx PTC_Human.fasta samtools faidx PTC_Human.fasta
-java -jar /export/apps/picard-tools/1.112/CreateSequenceDictionary.jar \+picard CreateSequenceDictionary \
    R=PTC_Human.fasta \    R=PTC_Human.fasta \
    O=PTC_Human.dict    O=PTC_Human.dict
Line 36: Line 37:
  
 <code> <code>
-bowtie2 -x PTC_Human -U Cohen.fastq -S Cohen.sam +bowtie2 -x PTC_Human -U Sample1.fastq -S Sample1.sam 
-samtools view -bS Cohen.sam > Cohen.bam+samtools view -bS Sample1.sam > Sample1.bam 
 + 
 +bowtie2 -x PTC_Human -U Sample2.fastq -S Sample2.sam 
 +samtools view -bS Sample2.sam > Sample2.bam 
 + 
 +bowtie2 -x PTC_Human -U Sample3.fastq -S Sample3.sam 
 +samtools view -bS Sample3.sam > Sample3.bam 
 + 
 +bowtie2 -x PTC_Human -U Sample4.fastq -S Sample4.sam 
 +samtools view -bS Sample4.sam > Sample4.bam
 </code> </code>
  
Line 43: Line 53:
  
 <code> <code>
-mkdir /var/scratch/mkatari +module load picard/1.133 
-mkdir /var/scratch/mkatari/tmp+ 
 +picard SortSam INPUT=Sample1.bam OUTPUT=Sample1.sorted.bam \ 
 +    SORT_ORDER=coordinate 
 + 
 +picard SortSam INPUT=Sample2.bam OUTPUT=Sample2.sorted.bam \ 
 +    SORT_ORDER=coordinate 
 + 
 +picard SortSam INPUT=Sample3.bam OUTPUT=Sample3.sorted.bam \ 
 +    SORT_ORDER=coordinate 
 + 
 +picard SortSam INPUT=Sample4.bam OUTPUT=Sample4.sorted.bam \ 
 +    SORT_ORDER=coordinate
  
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/SortSam.jar \ 
-   INPUT=Cohen.bam \ 
-   OUTPUT=Cohen.sorted.bam \ 
-   SORT_ORDER=coordinate 
-    
  
 </code> </code>
Line 57: Line 73:
  
 <code> <code>
 +picard AddOrReplaceReadGroups \
 +   INPUT=Sample1.sorted.bam \
 +   OUTPUT=Sample1RG.bam \
 +   RGLB=Sample1 \
 +   RGPL=IonTorrent \
 +   RGPU=None \
 +   RGSM=Sample1
  
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/AddOrReplaceReadGroups.jar +picard AddOrReplaceReadGroups \ 
-   INPUT=Cohen.sorted.bam \ +   INPUT=Sample2.sorted.bam \ 
-   OUTPUT=CohenRG.bam \ +   OUTPUT=Sample2RG.bam \ 
-   RGLB=Cohen \+   RGLB=Sample2 \
    RGPL=IonTorrent \    RGPL=IonTorrent \
    RGPU=None \    RGPU=None \
-   RGSM=Cohen+   RGSM=Sample2 
 + 
 +picard AddOrReplaceReadGroups \ 
 +   INPUT=Sample3.sorted.bam \ 
 +   OUTPUT=Sample3RG.bam \ 
 +   RGLB=Sample3 \ 
 +   RGPL=IonTorrent \ 
 +   RGPU=None \ 
 +   RGSM=Sample3 
 + 
 +picard AddOrReplaceReadGroups \ 
 +   INPUT=Sample4.sorted.bam \ 
 +   OUTPUT=Sample4RG.bam \ 
 +   RGLB=Sample4 \ 
 +   RGPL=IonTorrent \ 
 +   RGPU=None \ 
 +   RGSM=Sample4 
 + 
 </code> </code>
  
Line 70: Line 111:
 <code> <code>
  
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/MarkDuplicates.jar +picard MarkDuplicates \ 
-   INPUT=CohenRG.bam \ +   INPUT=Sample1RG.bam \ 
-   OUTPUT=Cohen.dedup.bam \ +   OUTPUT=Sample1.dedup.bam \ 
-   METRICS_FILE=Cohen.dedup.metrics \+   METRICS_FILE=Sample1.dedup.metrics \ 
 +   REMOVE_DUPLICATES=TRUE \ 
 +   ASSUME_SORTED=TRUE \ 
 +   MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 
 + 
 + 
 +picard MarkDuplicates \ 
 +   INPUT=Sample2RG.bam \ 
 +   OUTPUT=Sample2.dedup.bam \ 
 +   METRICS_FILE=Sample2.dedup.metrics \ 
 +   REMOVE_DUPLICATES=TRUE \ 
 +   ASSUME_SORTED=TRUE \ 
 +   MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 
 + 
 +picard MarkDuplicates 
 +   INPUT=Sample3RG.bam \ 
 +   OUTPUT=Sample3.dedup.bam \ 
 +   METRICS_FILE=Sample3.dedup.metrics \ 
 +   REMOVE_DUPLICATES=TRUE \ 
 +   ASSUME_SORTED=TRUE \ 
 +   MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 
 + 
 +picard MarkDuplicates 
 +   INPUT=Sample4RG.bam \ 
 +   OUTPUT=Sample4.dedup.bam \ 
 +   METRICS_FILE=Sample4.dedup.metrics \
    REMOVE_DUPLICATES=TRUE \    REMOVE_DUPLICATES=TRUE \
    ASSUME_SORTED=TRUE \    ASSUME_SORTED=TRUE \
Line 82: Line 148:
 Index the files and realign them Index the files and realign them
 <code> <code>
-samtools index Cohen.dedup.bam +samtools index Sample1.dedup.bam  
 +samtools index Sample2.dedup.bam  
 +samtools index Sample3.dedup.bam  
 +samtools index Sample4.dedup.bam 
  
 #identifying indels #identifying indels
-java -Xmx2g -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar \+ 
 +module load gatk/3.3.0 
 + 
 +GenomeAnalysisTK \
    -T RealignerTargetCreator \    -T RealignerTargetCreator \
    -R PTC_Human.fasta \    -R PTC_Human.fasta \
-   -I Cohen.dedup.bam \ +   -I Sample1.dedup.bam \ 
-   -o CohenforIndelRealigner.intervals+   -o Sample1forIndelRealigner.intervals
    
  
 +GenomeAnalysisTK \
 +   -T IndelRealigner \
 +   -R PTC_Human.fasta \
 +   -I Sample1.dedup.bam \
 +   -targetIntervals Sample1forIndelRealigner.intervals \
 +   -o Sample1.dedup.realign.bam
 +
 +GenomeAnalysisTK \
 +   -T RealignerTargetCreator \
 +   -R PTC_Human.fasta \
 +   -I Sample2.dedup.bam \
 +   -o Sample2forIndelRealigner.intervals 
 +
 +GenomeAnalysisTK \
 +   -T IndelRealigner \
 +   -R PTC_Human.fasta \
 +   -I Sample2.dedup.bam \
 +   -targetIntervals Sample2forIndelRealigner.intervals \
 +   -o Sample2.dedup.realign.bam
 +
 +
 +GenomeAnalysisTK \
 +   -T RealignerTargetCreator \
 +   -R PTC_Human.fasta \
 +   -I Sample3.dedup.bam \
 +   -o Sample3forIndelRealigner.intervals
    
- java -Xmx4g -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar \+GenomeAnalysisTK \
    -T IndelRealigner \    -T IndelRealigner \
    -R PTC_Human.fasta \    -R PTC_Human.fasta \
-   -I Cohen.dedup.bam \ +   -I Sample3.dedup.bam \ 
-  -targetIntervals CohenforIndelRealigner.intervals \ +   -targetIntervals Sample3forIndelRealigner.intervals \ 
-   -o Cohen.dedup.realign.bam+   -o Sample3.dedup.realign.bam 
 + 
 +GenomeAnalysisTK \ 
 +   -T RealignerTargetCreator \ 
 +   -R PTC_Human.fasta \ 
 +   -I Sample4.dedup.bam \ 
 +   -o Sample4forIndelRealigner.intervals 
 +  
 + 
 +GenomeAnalysisTK \ 
 +   -T IndelRealigner \ 
 +   -R PTC_Human.fasta \ 
 +   -I Sample4.dedup.bam \ 
 +   -targetIntervals Sample4forIndelRealigner.intervals \ 
 +   -o Sample4.dedup.realign.bam
  
 </code> </code>
Line 105: Line 217:
  
 <code> <code>
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/CleanSam.jar +picard CleanSam \ 
-   INPUT=Sherman.dedup.realign.bam \ +   INPUT=Sample4.dedup.realign.bam \ 
-   OUTPUT=Sherman.clean.dedup.realign.bam+   OUTPUT=Sample4.clean.dedup.realign.bam
 </code> </code>
  
Line 113: Line 225:
  
 <code> <code>
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/MergeSamFiles.jar +picard MergeSamFiles \ 
-   INPUT=Sherman.clean.dedup.realign.bam \ +   INPUT=Sample1.dedup.realign.bam \ 
-   INPUT=Cohen.dedup.realign.bam \ +   INPUT=Sample2.dedup.realign.bam 
-   OUTPUT=ShermanCohenMerged.bam  +   INPUT=Sample3.dedup.realign.bam \ 
 +   INPUT=Sample4.dedup.realign.bam \ 
 +   OUTPUT=AllMerged.bam   
 + 
 +picard SortSam INPUT=AllMerged.bam OUTPUT=AllMerged.sorted.bam SORT_ORDER=coordinate
  
-samtools sort ShermanCohenMerged.bam ShermanCohenMerged.sorted+samtools index AllMerged.sorted.bam
  
-samtools index ShermanCohenMerged.sorted.bam  
 </code> </code>
  
Line 127: Line 242:
  
 <code> <code>
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar \ +GenomeAnalysisTK -T UnifiedGenotyper \ 
-   -T UnifiedGenotyper \ +   -I AllMerged.sorted.bam \
-   -I ShermanCohenMerged.sorted.bam \+
    -R PTC_Human.fasta \    -R PTC_Human.fasta \
    --output_mode EMIT_VARIANTS_ONLY \    --output_mode EMIT_VARIANTS_ONLY \
Line 146: Line 260:
 If you would like to generate a table of from the vcf file use the following command If you would like to generate a table of from the vcf file use the following command
 <code> <code>
-java --Djava.io.tmpdir=/var/scratch/mkatari/tmp jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar +GenomeAnalysisTK \ 
-     -R PTC_Human.fasta+     -R PTC_Human.fasta \
      -T VariantsToTable \      -T VariantsToTable \
      -V PTC_human.gatk.vcf \      -V PTC_human.gatk.vcf \
Line 158: Line 272:
  
 <code> <code>
-java -Xmx2g -jar /export/apps/gatk/3.1.1/GenomeAnalysisTK.jar +GenomeAnalysisTK \ 
-    -R /home/emasumba/cassavaV5_0.chromsomesRomanNumerals.fa \+    -R PTC_Human.fasta \
     -T VariantFiltration \     -T VariantFiltration \
-    -o namikonga_albert_filter.vcf \ +    -o PTC_human.gatk.filter.vcf \ 
-    --variant /home/emasumba/Namikonga_Albert.gatk.vcf \ +    --variant PTC_human.gatk.vcf \ 
-    --filterExpression "QD < 2.0 || MQ < 40.0 || FS > 60.0 || HaplotypeScore >13.0"+    --filterExpression "QD<2.0||MQ<40.0||FS>60.0||HaplotypeScore>13.0"
-    --filterName "mannyfilter"+    --filterName mannyfilter
  
 </code> </code>
Line 174: Line 288:
 <code> <code>
  
-java -Xmx2g -jar /export/apps/gatk/3.1.1/GenomeAnalysisTK.jar \+GenomeAnalysisTK \
     -T SelectVariants \     -T SelectVariants \
-    --variant namikonga_albert_filter.vcf \ +    --variant PTC_human.gatk.filter.vcf \ 
-    -o namikonga_albert_filter_only.vcf \+    -o PTC_human.gatk.filter.only.vcf \
     -ef \     -ef \
-    -R /home/emasumba/cassavaV5_0.chromsomesRomanNumerals.fa+    -R PTC_Human.fasta
  
 </code> </code>
mkatari-bioinformatics-august-2013-gatknotes.1433749532.txt.gz · Last modified: 2015/06/08 07:45 by mkatari