From 189829841b56a58b52ca0386c2fe21231db054f1 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 12 Jan 2010 18:50:39 +0000 Subject: [PATCH] The recalibrator now uses all input RODs when looking for known polymorphic sites not just the one named dbsnp. Added an integration test which uses both dbsnp and an input vcf file and skips over the union of the two. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2564 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/refdata/ReferenceOrderedData.java | 2 +- .../recalibration/CovariateCounterWalker.java | 19 ++++++------- .../RecalibrationWalkersIntegrationTest.java | 28 +++++++++++++++++++ 3 files changed, 38 insertions(+), 11 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index f3f2fe664..ea7a3d3d4 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -125,7 +125,7 @@ public class ReferenceOrderedData implements } /** - * given a existing file, open it and append all the valid triplet lines to an existing list + * given an existing file, open it and append all the valid triplet lines to an existing list * * @param rodTripletList the list of existing triplets * @param filename the file to attempt to extract ROD triplets from diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index b814db015..90e002362 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import org.broadinstitute.sting.gatk.refdata.RODRecordList; +import org.broadinstitute.sting.gatk.refdata.VariationRod; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; @@ -103,7 +104,7 @@ public class CovariateCounterWalker extends LocusWalker { private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site - private static final String versionString = "v2.2.4"; // Major version, minor version, and build number + private static final String versionString = "v2.2.5"; // Major version, minor version, and build number private Pair dbSNP_counts = new Pair(0L, 0L); // mismatch/base counts for dbSNP loci private Pair novel_counts = new Pair(0L, 0L); // mismatch/base counts for non-dbSNP loci private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) @@ -149,8 +150,9 @@ public class CovariateCounterWalker extends LocusWalker { // Warn the user if no dbSNP file was specified boolean foundDBSNP = false; for( ReferenceOrderedDataSource rod : this.getToolkit().getRodDataSources() ) { - if( rod.getName().equalsIgnoreCase( "dbsnp" ) ) { + if( rod != null ) { foundDBSNP = true; + break; } } if( !foundDBSNP ) { @@ -232,15 +234,12 @@ public class CovariateCounterWalker extends LocusWalker { */ public Integer map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) { - // Pull out anything passed by -B name,type,file that has the name "dbsnp" - final RODRecordList dbsnpRODs = tracker.getTrackData( "dbsnp", null ); + // Pull out data for this locus for all the input RODs and check if this is a known variant site in any of them boolean isSNP = false; - if (dbsnpRODs != null) { - for( ReferenceOrderedDatum rod : dbsnpRODs ) { - if( ((Variation)rod).isSNP() ) { - isSNP = true; // At least one of the rods says this is a snp site - break; - } + for( ReferenceOrderedDatum rod : tracker.getAllRods() ) { + if( rod != null && rod instanceof Variation && ((Variation)rod).isSNP() ) { + isSNP = true; // At least one of the rods says this is a snp site + break; } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 09c4ce095..65ad18d84 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -133,6 +133,34 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { } } + @Test + public void testCountCovariatesVCFPlusDBsnp() { + HashMap e = new HashMap(); + e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "cc1cc9c1ff184d388d81574fdccc608e"); + + for ( Map.Entry entry : e.entrySet() ) { + String bam = entry.getKey(); + String md5 = entry.getValue(); + + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + "-R " + oneKGLocation + "reference/human_b36_both.fasta" + + " -B anyNameABCD,VCF," + validationDataLocation + "vcfexample3.vcf" + + " -T CountCovariates" + + " -I " + bam + + " --DBSNP /humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" + + " -L 1:10,000,000-10,200,000" + + " -cov ReadGroupCovariate" + + " -cov QualityScoreCovariate" + + " -cov CycleCovariate" + + " -cov DinucCovariate" + + " --solid_recal_mode SET_Q_ZERO" + + " -recalFile %s", + 1, // just one output file + Arrays.asList(md5)); + executeTest("testCountCovariatesVCFPlusDBsnp", spec); + } + } + @Test public void testCountCovariatesNoReadGroups() { HashMap e = new HashMap();