From 717eb1de96f0a20df2d86420505e2750bf4cc6f5 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 9 Dec 2009 02:53:00 +0000 Subject: [PATCH] - Depth annotation now includes MQ0 reads - Removed MQ0 annotation - Updated RMS MQ annotation to use new pileup - UG now outputs all of its arguments as key/value pairs in the header (for VCF) - Cleaned up VCFGenotypeWriterAdapter interface a bit git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2288 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/DepthOfCoverage.java | 4 +- .../walkers/annotator/MappingQualityZero.java | 28 ----------- .../walkers/annotator/RMSMappingQuality.java | 13 ++--- .../walkers/genotyper/UnifiedGenotyper.java | 50 +++++++++++++------ .../utils/genotype/GenotypeWriterFactory.java | 10 +--- .../vcf/VCFGenotypeWriterAdapter.java | 12 +---- .../VariantAnnotatorIntegrationTest.java | 8 +-- .../UnifiedGenotyperIntegrationTest.java | 12 ++--- packages/GATK-Picard.xml | 1 - packages/GenomeAnalysisTK.xml | 1 - 10 files changed, 57 insertions(+), 82 deletions(-) delete mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index c72de0155..76f3aa852 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -15,7 +15,7 @@ public class DepthOfCoverage extends StandardVariantAnnotation { public String getKeyName() { return VCFRecord.DEPTH_KEY; } - public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth\""; } + public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth (including MQ0 reads)\""; } - public boolean useZeroQualityReads() { return false; } + public boolean useZeroQualityReads() { return true; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java deleted file mode 100755 index 9e3dbf727..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.annotator; - -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.genotype.Variation; -import net.sf.samtools.SAMRecord; - -import java.util.List; - - -public class MappingQualityZero extends StandardVariantAnnotation { - - public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - List reads = pileup.getReads(); - int MQ0Count = 0; - for (int i=0; i < reads.size(); i++) { - if ( reads.get(i).getMappingQuality() == 0 ) - MQ0Count++; - } - return String.format("%d", MQ0Count); - } - - public String getKeyName() { return "MQ0"; } - - public String getDescription() { return "MQ0,1,Integer,\"Total Mapping Quality Zero Reads\""; } - - public boolean useZeroQualityReads() { return true; } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index b1d86115b..9b19a104a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -3,21 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import net.sf.samtools.SAMRecord; - -import java.util.List; - public class RMSMappingQuality extends StandardVariantAnnotation { public String annotate(ReferenceContext ref, ReadBackedPileup pileup, Variation variation) { - List reads = pileup.getReads(); - int[] qualities = new int[reads.size()]; - for (int i=0; i < reads.size(); i++) - qualities[i] = reads.get(i).getMappingQuality(); + int[] qualities = new int[pileup.size()]; + int index = 0; + for (PileupElement p : pileup ) + qualities[index++] = p.getRead().getMappingQuality(); double rms = MathUtils.rms(qualities); return String.format("%.2f", rms); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 379d4f329..b07996b38 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -134,31 +134,53 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = getHeaderInfo(); + + // create the output writer stream + if ( VARIANTS_FILE != null ) + writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, + samples, + headerInfo); + else + writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, + samples, + headerInfo); + + callsMetrics = new CallMetrics(); + } + + private Set getHeaderInfo() { Set headerInfo = new HashSet(); + + headerInfo.add("source=UnifiedGenotyper"); + headerInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName()); + if ( UAC.ALL_ANNOTATIONS ) headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions()); else headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions()); + headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\""); headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\""); if ( !UAC.NO_SLOD ) headerInfo.add("INFO=SB,1,Float,\"Strand Bias\""); - // create the output writer stream - if ( VARIANTS_FILE != null ) - writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, - "UnifiedGenotyper", - this.getToolkit().getArguments().referenceFile.getName(), - samples, - headerInfo); - else - writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, - "UnifiedGenotyper", - this.getToolkit().getArguments().referenceFile.getName(), - samples, - headerInfo); + headerInfo.add("UG_genotype_model=" + UAC.genotypeModel); + headerInfo.add("UG_base_model=" + UAC.baseModel); + headerInfo.add("UG_heterozygosity=" + UAC.heterozygosity); + headerInfo.add("UG_genotype_mode=" + UAC.GENOTYPE); + headerInfo.add("UG_all_bases_mode=" + UAC.ALL_BASES); + headerInfo.add("UG_min_confidence=" + UAC.CONFIDENCE_THRESHOLD); + headerInfo.add("UG_max_deletion_fraction=" + UAC.MAX_DELETION_FRACTION); + headerInfo.add("UG_max_coverage=" + UAC.MAX_READS_IN_PILEUP); + if ( UAC.POOLSIZE > 0 ) + headerInfo.add("UG_poolSize=" + UAC.POOLSIZE); + if ( UAC.ASSUME_SINGLE_SAMPLE != null ) + headerInfo.add("UG_single_sample=" + UAC.ASSUME_SINGLE_SAMPLE); + if ( UAC.defaultPlatform != null ) + headerInfo.add("UG_default_platform=" + UAC.defaultPlatform); - callsMetrics = new CallMetrics(); + return headerInfo; } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 0f6840975..6f447044b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -30,8 +30,6 @@ public class GenotypeWriterFactory { * @param format the format * @param header the sam file header * @param destination the destination file - * @param source the source - * @param referenceName the ref name * @param sampleNames the sample names * @param headerInfo the optional header info fields * @return the genotype writer object @@ -39,8 +37,6 @@ public class GenotypeWriterFactory { public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, - String source, - String referenceName, Set sampleNames, Set headerInfo) { switch (format) { @@ -51,7 +47,7 @@ public class GenotypeWriterFactory { case GELI_BINARY: return new GeliAdapter(destination, header); case VCF: - return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo); + return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo); default: throw new StingException("Genotype writer " + format.toString() + " is not implemented"); } @@ -60,8 +56,6 @@ public class GenotypeWriterFactory { public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, PrintStream destination, - String source, - String referenceName, Set sampleNames, Set headerInfo) { switch (format) { @@ -70,7 +64,7 @@ public class GenotypeWriterFactory { case GLF: return new GLFWriter(header.toString(), destination); case VCF: - return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo); + return new VCFGenotypeWriterAdapter(destination, sampleNames, headerInfo); default: throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 87c3d33de..c5dd924e4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -19,17 +19,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { // our VCF objects private VCFWriter mWriter = null; private VCFHeader mHeader = null; - private String mSource; - private String mReferenceName; private final Set mSampleNames = new LinkedHashSet(); /** our log, which we want to capture anything from this class */ protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class); - public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set sampleNames, Set headerInfo) { - mReferenceName = referenceName; - mSource = source; + public VCFGenotypeWriterAdapter(File writeTo, Set sampleNames, Set headerInfo) { mSampleNames.addAll(sampleNames); initializeHeader(headerInfo); @@ -38,9 +34,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { mWriter = new VCFWriter(mHeader, writeTo); } - public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set sampleNames, Set headerInfo) { - mReferenceName = referenceName; - mSource = source; + public VCFGenotypeWriterAdapter(OutputStream writeTo, Set sampleNames, Set headerInfo) { mSampleNames.addAll(sampleNames); initializeHeader(headerInfo); @@ -59,8 +53,6 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { // setup the header fields hInfo.add(VCFHeader.FULL_FORMAT_LINE); - hInfo.add("source=" + mSource); - hInfo.add("reference=" + mReferenceName); hInfo.addAll(optionalHeaderInfo); // setup the sample names diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 3b603d11a..5d4c743a3 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("31ddecd3118bd6aea00bcd0369a1b32f")); + Arrays.asList("24609ede968a90ebff949791694f70a0")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("553c55f3bcf31a6b1d9a2c1d27fde480")); + Arrays.asList("9f05c78ef9913259c02f8bda6ec97505")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("fa1071b292fd2e9db5c36410ffd44fdb")); + Arrays.asList("54811556d642a83fcfa2b359aa275023")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("9d133a91949e477523a039a0a52ac2a8")); + Arrays.asList("b67282a03c92ae3d24a7b588aa1e61df")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 07cc2371b..1db431524 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("68fcdca40df72b3c703bab846c5f0bbd")); + Arrays.asList("7c27fc1ad1fedab6944d5f93094ef914")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("eb0cd5494ae4c1781bfa00b6c4146993")); + Arrays.asList("cd08c5e4dbd0cf477efd1204c681c7c6")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("ec1aeb69d7d54a7ced1ce625146d1d59")); + Arrays.asList("d7579207d2521f36648b2b43be805bc4")); executeTest("testPooled1", spec); } @@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("e27552dad05ddf17403aaa7176b9cfe2")); + Arrays.asList("f4e8721a905e42f511967a1cfdb966ec")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("3618711e41b7e37f47b995d39adbc76b")); + Arrays.asList("962838ae188b1c07a35ead671019062e")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("2a32add40319ab2de44951624df2be4b")); + Arrays.asList("a9cf1abecf709de9f158f6ffe57b00e7")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } diff --git a/packages/GATK-Picard.xml b/packages/GATK-Picard.xml index c830b60d2..b91f36f42 100644 --- a/packages/GATK-Picard.xml +++ b/packages/GATK-Picard.xml @@ -44,7 +44,6 @@ org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun - org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions diff --git a/packages/GenomeAnalysisTK.xml b/packages/GenomeAnalysisTK.xml index a97597bf3..02104f44d 100644 --- a/packages/GenomeAnalysisTK.xml +++ b/packages/GenomeAnalysisTK.xml @@ -44,7 +44,6 @@ org.broadinstitute.sting.gatk.walkers.annotator.AlleleBalance org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun - org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions