diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 914f54363..b4739f366 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -25,15 +25,10 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; -import org.broad.tribble.bed.BEDCodec; -import org.broad.tribble.dbsnp.DbSNPCodec; -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.ArgumentCollection; -import org.broadinstitute.sting.commandline.Gather; -import org.broadinstitute.sting.commandline.Output; +import org.broad.tribble.Feature; +import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableReadFilter; import org.broadinstitute.sting.gatk.filters.MappingQualityZeroReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,8 +37,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec; -import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.collections.NestedHashMap; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -94,13 +87,18 @@ public class CountCovariatesWalker extends LocusWalker> knownSites = Collections.emptyList(); + @Output + PrintStream out; + @Output(fullName="recal_file", shortName="recalFile", required=true, doc="Filename for the output covariates table recalibration file") @Gather(CountCovariatesGatherer.class) public PrintStream RECAL_FILE; @@ -124,8 +122,8 @@ public class CountCovariatesWalker extends LocusWalker requestedCovariates = new ArrayList(); // A list to hold the covariate objects that were requested - private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) - private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen) + private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878) + private static int DBSNP_VALIDATION_CHECK_FREQUENCY = 1000000; // how often to validate dbsnp mismatch rate (in terms of loci seen) public static class CountedData { private long countedSites = 0; // Number of loci used in the calculations, used for reporting in the output file @@ -136,7 +134,7 @@ public class CountCovariatesWalker extends LocusWalker 0; - // Only use data from non-dbsnp sites // Assume every mismatch at a non-dbsnp site is indicative of poor quality CountedData counter = new CountedData(); - if( !isSNP ) { + if( tracker.getValues(knownSites).size() == 0 ) { // If something here is in one of the knownSites tracks then skip over it, otherwise proceed // For each read at this locus - for( PileupElement p : context.getBasePileup() ) { - GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead(); + for( final PileupElement p : context.getBasePileup() ) { + final GATKSAMRecord gatkRead = (GATKSAMRecord) p.getRead(); int offset = p.getOffset(); if( gatkRead.containsTemporaryAttribute( SKIP_RECORD_ATTRIBUTE ) ) { @@ -358,8 +335,6 @@ public class CountCovariatesWalker extends LocusWalker e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "170f0c3cc4b8d72c539136effeec9a16"); - - for ( Map.Entry entry : e.entrySet() ) { - String bam = entry.getKey(); - String md5 = entry.getValue(); - - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-R " + b36KGReference + - " -B:dbsnp,VCF3 " + validationDataLocation + "vcfexample3.vcf" + - " -T CountCovariates" + - " -I " + bam + - " -L 1:10,000,000-10,200,000" + - " -standard" + - " --solid_recal_mode SET_Q_ZERO" + - " -recalFile %s", - 1, // just one output file - Arrays.asList(md5)); - executeTest("testCountCovariatesVCF", spec); - } - } - @Test public void testCountCovariatesBED() { HashMap e = new HashMap(); @@ -260,7 +236,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:bed,bed " + validationDataLocation + "recalibrationTest.bed" + + " -knownSites:bed " + validationDataLocation + "recalibrationTest.bed" + " -T CountCovariates" + " -I " + bam + " -L 1:10,000,000-10,200,000" + @@ -284,10 +260,10 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + + " -knownSites:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " -B:dbsnp,vcf " + b36dbSNP129 + + " -knownSites " + b36dbSNP129 + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -312,7 +288,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,vcf " + b36dbSNP129 + + " -knownSites " + b36dbSNP129 + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" +