User Tools

Site Tools


mkatari-bioinformatics-august-2013-gatknotes

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
mkatari-bioinformatics-august-2013-gatknotes [2014/10/09 13:12] mkatarimkatari-bioinformatics-august-2013-gatknotes [2016/08/17 08:37] (current) mkatari
Line 12: Line 12:
 module load bowtie2 module load bowtie2
 module load samtools module load samtools
 +module load picard
  
 bowtie2-build PTC_Human.fasta PTC_Human bowtie2-build PTC_Human.fasta PTC_Human
 samtools faidx PTC_Human.fasta samtools faidx PTC_Human.fasta
-java -jar /export/apps/picard-tools/1.112/CreateSequenceDictionary.jar \+picard CreateSequenceDictionary \
    R=PTC_Human.fasta \    R=PTC_Human.fasta \
    O=PTC_Human.dict    O=PTC_Human.dict
Line 36: Line 37:
  
 <code> <code>
-bowtie2 -x PTC_Human -U Cohen.fastq -S Cohen.sam +bowtie2 -x PTC_Human -U Sample1.fastq -S Sample1.sam 
-samtools view -bS Cohen.sam > Cohen.bam+samtools view -bS Sample1.sam > Sample1.bam 
 + 
 +bowtie2 -x PTC_Human -U Sample2.fastq -S Sample2.sam 
 +samtools view -bS Sample2.sam > Sample2.bam 
 + 
 +bowtie2 -x PTC_Human -U Sample3.fastq -S Sample3.sam 
 +samtools view -bS Sample3.sam > Sample3.bam 
 + 
 +bowtie2 -x PTC_Human -U Sample4.fastq -S Sample4.sam 
 +samtools view -bS Sample4.sam > Sample4.bam
 </code> </code>
  
Line 43: Line 53:
  
 <code> <code>
-mkdir /var/scratch/mkatari +module load picard/1.133 
-mkdir /var/scratch/mkatari/tmp+ 
 +picard SortSam INPUT=Sample1.bam OUTPUT=Sample1.sorted.bam \ 
 +    SORT_ORDER=coordinate 
 + 
 +picard SortSam INPUT=Sample2.bam OUTPUT=Sample2.sorted.bam \ 
 +    SORT_ORDER=coordinate 
 + 
 +picard SortSam INPUT=Sample3.bam OUTPUT=Sample3.sorted.bam \ 
 +    SORT_ORDER=coordinate 
 + 
 +picard SortSam INPUT=Sample4.bam OUTPUT=Sample4.sorted.bam \ 
 +    SORT_ORDER=coordinate
  
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/SortSam.jar \ 
-   INPUT=Cohen.bam \ 
-   OUTPUT=Cohen.sorted.bam \ 
-   SORT_ORDER=coordinate 
-    
  
 </code> </code>
Line 57: Line 73:
  
 <code> <code>
 +picard AddOrReplaceReadGroups \
 +   INPUT=Sample1.sorted.bam \
 +   OUTPUT=Sample1RG.bam \
 +   RGLB=Sample1 \
 +   RGPL=IonTorrent \
 +   RGPU=None \
 +   RGSM=Sample1
  
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/AddOrReplaceReadGroups.jar +picard AddOrReplaceReadGroups \ 
-   INPUT=Cohen.sorted.bam \ +   INPUT=Sample2.sorted.bam \ 
-   OUTPUT=CohenRG.bam \ +   OUTPUT=Sample2RG.bam \ 
-   RGLB=Cohen \+   RGLB=Sample2 \
    RGPL=IonTorrent \    RGPL=IonTorrent \
    RGPU=None \    RGPU=None \
-   RGSM=Cohen+   RGSM=Sample2 
 + 
 +picard AddOrReplaceReadGroups \ 
 +   INPUT=Sample3.sorted.bam \ 
 +   OUTPUT=Sample3RG.bam \ 
 +   RGLB=Sample3 \ 
 +   RGPL=IonTorrent \ 
 +   RGPU=None \ 
 +   RGSM=Sample3 
 + 
 +picard AddOrReplaceReadGroups \ 
 +   INPUT=Sample4.sorted.bam \ 
 +   OUTPUT=Sample4RG.bam \ 
 +   RGLB=Sample4 \ 
 +   RGPL=IonTorrent \ 
 +   RGPU=None \ 
 +   RGSM=Sample4 
 + 
 </code> </code>
  
Line 70: Line 111:
 <code> <code>
  
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/MarkDuplicates.jar +picard MarkDuplicates \ 
-   INPUT=CohenRG.bam \ +   INPUT=Sample1RG.bam \ 
-   OUTPUT=Cohen.dedup.bam \ +   OUTPUT=Sample1.dedup.bam \ 
-   METRICS_FILE=Cohen.dedup.metrics \+   METRICS_FILE=Sample1.dedup.metrics \ 
 +   REMOVE_DUPLICATES=TRUE \ 
 +   ASSUME_SORTED=TRUE \ 
 +   MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 
 + 
 + 
 +picard MarkDuplicates \ 
 +   INPUT=Sample2RG.bam \ 
 +   OUTPUT=Sample2.dedup.bam \ 
 +   METRICS_FILE=Sample2.dedup.metrics \ 
 +   REMOVE_DUPLICATES=TRUE \ 
 +   ASSUME_SORTED=TRUE \ 
 +   MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 
 + 
 +picard MarkDuplicates 
 +   INPUT=Sample3RG.bam \ 
 +   OUTPUT=Sample3.dedup.bam \ 
 +   METRICS_FILE=Sample3.dedup.metrics \ 
 +   REMOVE_DUPLICATES=TRUE \ 
 +   ASSUME_SORTED=TRUE \ 
 +   MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 
 + 
 +picard MarkDuplicates 
 +   INPUT=Sample4RG.bam \ 
 +   OUTPUT=Sample4.dedup.bam \ 
 +   METRICS_FILE=Sample4.dedup.metrics \
    REMOVE_DUPLICATES=TRUE \    REMOVE_DUPLICATES=TRUE \
    ASSUME_SORTED=TRUE \    ASSUME_SORTED=TRUE \
Line 82: Line 148:
 Index the files and realign them Index the files and realign them
 <code> <code>
-samtools index Cohen.dedup.bam +samtools index Sample1.dedup.bam  
 +samtools index Sample2.dedup.bam  
 +samtools index Sample3.dedup.bam  
 +samtools index Sample4.dedup.bam 
  
 #identifying indels #identifying indels
-java -Xmx2g -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar \+ 
 +module load gatk/3.3.0 
 + 
 +GenomeAnalysisTK \
    -T RealignerTargetCreator \    -T RealignerTargetCreator \
    -R PTC_Human.fasta \    -R PTC_Human.fasta \
-   -I Cohen.dedup.bam \ +   -I Sample1.dedup.bam \ 
-   -o CohenforIndelRealigner.intervals+   -o Sample1forIndelRealigner.intervals
    
  
 +GenomeAnalysisTK \
 +   -T IndelRealigner \
 +   -R PTC_Human.fasta \
 +   -I Sample1.dedup.bam \
 +   -targetIntervals Sample1forIndelRealigner.intervals \
 +   -o Sample1.dedup.realign.bam
 +
 +GenomeAnalysisTK \
 +   -T RealignerTargetCreator \
 +   -R PTC_Human.fasta \
 +   -I Sample2.dedup.bam \
 +   -o Sample2forIndelRealigner.intervals 
 +
 +GenomeAnalysisTK \
 +   -T IndelRealigner \
 +   -R PTC_Human.fasta \
 +   -I Sample2.dedup.bam \
 +   -targetIntervals Sample2forIndelRealigner.intervals \
 +   -o Sample2.dedup.realign.bam
 +
 +
 +GenomeAnalysisTK \
 +   -T RealignerTargetCreator \
 +   -R PTC_Human.fasta \
 +   -I Sample3.dedup.bam \
 +   -o Sample3forIndelRealigner.intervals
    
- java -Xmx4g -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar \+GenomeAnalysisTK \ 
 +   -T IndelRealigner \ 
 +   -R PTC_Human.fasta \ 
 +   -I Sample3.dedup.bam \ 
 +   -targetIntervals Sample3forIndelRealigner.intervals \ 
 +   -o Sample3.dedup.realign.bam 
 + 
 +GenomeAnalysisTK 
 +   -T RealignerTargetCreator \ 
 +   -R PTC_Human.fasta \ 
 +   -I Sample4.dedup.bam \ 
 +   -o Sample4forIndelRealigner.intervals 
 +  
 + 
 +GenomeAnalysisTK \
    -T IndelRealigner \    -T IndelRealigner \
    -R PTC_Human.fasta \    -R PTC_Human.fasta \
-   -I Cohen.dedup.bam \ +   -I Sample4.dedup.bam \ 
-  -targetIntervals CohenforIndelRealigner.intervals \ +   -targetIntervals Sample4forIndelRealigner.intervals \ 
-   -o Cohen.dedup.realign.bam+   -o Sample4.dedup.realign.bam
  
 </code> </code>
Line 105: Line 217:
  
 <code> <code>
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/CleanSam.jar +picard CleanSam \ 
-   INPUT=Sherman.dedup.realign.bam \ +   INPUT=Sample4.dedup.realign.bam \ 
-   OUTPUT=Sherman.clean.dedup.realign.bam+   OUTPUT=Sample4.clean.dedup.realign.bam
 </code> </code>
  
Line 113: Line 225:
  
 <code> <code>
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/picard-tools/1.112/MergeSamFiles.jar +picard MergeSamFiles \ 
-   INPUT=Sherman.clean.dedup.realign.bam \ +   INPUT=Sample1.dedup.realign.bam \ 
-   INPUT=Cohen.dedup.realign.bam \ +   INPUT=Sample2.dedup.realign.bam 
-   OUTPUT=ShermanCohenMerged.bam  +   INPUT=Sample3.dedup.realign.bam \ 
 +   INPUT=Sample4.dedup.realign.bam \ 
 +   OUTPUT=AllMerged.bam  
  
-samtools sort ShermanCohenMerged.bam ShermanCohenMerged.sorted+picard SortSam INPUT=AllMerged.bam OUTPUT=AllMerged.sorted.bam SORT_ORDER=coordinate 
 + 
 +samtools index AllMerged.sorted.bam
  
-samtools index ShermanCohenMerged.sorted.bam  
 </code> </code>
  
Line 127: Line 242:
  
 <code> <code>
-java -Djava.io.tmpdir=/var/scratch/mkatari/tmp -jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar \ +GenomeAnalysisTK -T UnifiedGenotyper \ 
-   -T UnifiedGenotyper \ +   -I AllMerged.sorted.bam \
-   -I ShermanCohenMerged.sorted.bam \+
    -R PTC_Human.fasta \    -R PTC_Human.fasta \
    --output_mode EMIT_VARIANTS_ONLY \    --output_mode EMIT_VARIANTS_ONLY \
Line 146: Line 260:
 If you would like to generate a table of from the vcf file use the following command If you would like to generate a table of from the vcf file use the following command
 <code> <code>
-java --Djava.io.tmpdir=/var/scratch/mkatari/tmp jar /export/apps/GenomeAnalysisTK/GenomeAnalysisTK-2.3-9-ge5ebf34/GenomeAnalysisTK.jar +GenomeAnalysisTK \ 
-     -R PTC_Human.fasta+     -R PTC_Human.fasta \
      -T VariantsToTable \      -T VariantsToTable \
      -V PTC_human.gatk.vcf \      -V PTC_human.gatk.vcf \
Line 158: Line 272:
  
 <code> <code>
-java -Xmx2g -jar /export/apps/gatk/3.1.1/GenomeAnalysisTK.jar +GenomeAnalysisTK \ 
-    -R /home/emasumba/cassavaV5_0.chromsomesRomanNumerals.fa \+    -R PTC_Human.fasta \
     -T VariantFiltration \     -T VariantFiltration \
-    -o namikonga_albert_filter.vcf \ +    -o PTC_human.gatk.filter.vcf \ 
-    --variant /home/emasumba/Namikonga_Albert.gatk.vcf \ +    --variant PTC_human.gatk.vcf \ 
-    --filterExpression "QD < 2.0 || MQ < 40.0 || FS > 60.0 || HaplotypeScore >13.0" --filterName "mannyfilter"+    --filterExpression "QD<2.0||MQ<40.0||FS>60.0||HaplotypeScore>13.0" 
 +    --filterName mannyfilter
  
 </code> </code>
 +
 +Good descriptions of the different information on vcf files [[https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_gatk_tools_walkers_annotator_HaplotypeScore.php|GATK Docs]]
  
 Finally to save the SNPs that passed your filter, you simply use the selectvariant tool. Finally to save the SNPs that passed your filter, you simply use the selectvariant tool.
Line 171: Line 288:
 <code> <code>
  
-java -Xmx2g -jar /export/apps/gatk/3.1.1/GenomeAnalysisTK.jar \+GenomeAnalysisTK \
     -T SelectVariants \     -T SelectVariants \
-    --variant namikonga_albert_filter.vcf \ +    --variant PTC_human.gatk.filter.vcf \ 
-    -o namikonga_albert_filter_only.vcf \+    -o PTC_human.gatk.filter.only.vcf \
     -ef \     -ef \
-    -R /home/emasumba/cassavaV5_0.chromsomesRomanNumerals.fa+    -R PTC_Human.fasta
  
 </code> </code>
mkatari-bioinformatics-august-2013-gatknotes.1412860326.txt.gz · Last modified: 2014/10/09 13:12 by mkatari