diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 50015fd95..bdccf4dd8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -101,12 +101,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC if ( !vc.isBiallelic() ) { // for multi-allelic sites go back to the reads and find the most likely alternate allele - initializeBestAlternateAllele(refBase, contexts); + initializeBestAlternateAllele(refBase, contexts, useBAQedPileup); } else { bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0]; } } else { - initializeBestAlternateAllele(refBase, contexts); + initializeBestAlternateAllele(refBase, contexts, useBAQedPileup); } // if there are no non-ref bases... @@ -148,12 +148,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC return refAllele; } - protected void initializeBestAlternateAllele(byte ref, Map contexts) { + protected void initializeBestAlternateAllele(byte ref, Map contexts, boolean useBAQedPileup) { int[] qualCounts = new int[4]; for ( Map.Entry sample : contexts.entrySet() ) { // calculate the sum of quality scores for each base - ReadBackedPileup pileup = createBAQedPileup( sample.getValue().getBasePileup() ); + ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup(); for ( PileupElement p : pileup ) { // ignore deletions if ( p.isDeletion() || p.getQual() < UAC.MIN_BASE_QUALTY_SCORE ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 06b4aacd2..8d0c7eaf6 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -37,14 +37,15 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.pileup.*; import org.broad.tribble.vcf.VCFConstants; +import com.google.java.contract.*; import java.io.PrintStream; import java.util.*; public class UnifiedGenotyperEngine { - public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public enum OUTPUT_MODE { @@ -54,10 +55,10 @@ public class UnifiedGenotyperEngine { } // the unified argument collection - private UnifiedArgumentCollection UAC = null; + private final UnifiedArgumentCollection UAC; // the annotation engine - private VariantAnnotatorEngine annotationEngine; + private final VariantAnnotatorEngine annotationEngine; // the model used for calculating genotypes private ThreadLocal> glcm = new ThreadLocal>(); @@ -66,49 +67,52 @@ public class UnifiedGenotyperEngine { private ThreadLocal afcm = new ThreadLocal(); // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything - private double[] log10AlleleFrequencyPriorsSNPs; - private double[] log10AlleleFrequencyPriorsIndels; + private final double[] log10AlleleFrequencyPriorsSNPs; + private final double[] log10AlleleFrequencyPriorsIndels; // the allele frequency likelihoods (allocated once as an optimization) private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); // the priors object - private GenotypePriors genotypePriorsSNPs; - private GenotypePriors genotypePriorsIndels; + private final GenotypePriors genotypePriorsSNPs; + private final GenotypePriors genotypePriorsIndels; // samples in input - private Set samples = new TreeSet(); + private final Set samples; // the various loggers and writers - private Logger logger = null; - private PrintStream verboseWriter = null; + private final Logger logger; + private final PrintStream verboseWriter; // number of chromosomes (2 * samples) in input - private int N; + private final int N; // the standard filter to use for calls below the confidence threshold but above the emit threshold private static final Set filter = new HashSet(1); + private final boolean BAQEnabledOnCMDLine; + + + // --------------------------------------------------------------------------------------------------------- + // + // Public interface functions + // + // --------------------------------------------------------------------------------------------------------- + @Requires({"toolkit != null", "UAC != null"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) { - // get the number of samples - // if we're supposed to assume a single sample, do so - samples = new TreeSet(); - if ( UAC.ASSUME_SINGLE_SAMPLE != null ) - samples.add(UAC.ASSUME_SINGLE_SAMPLE); - else - samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()); - - final Logger logger = Logger.getLogger(UnifiedGenotyperEngine.class); - initialize(UAC, logger, null, null, samples.size()); + this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, + // get the number of samples + // if we're supposed to assume a single sample, do so + UAC.ASSUME_SINGLE_SAMPLE != null ? + new TreeSet(Arrays.asList(UAC.ASSUME_SINGLE_SAMPLE)) : + SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader())); } + @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples) { + this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; this.samples = new TreeSet(samples); - initialize(UAC, logger, verboseWriter, engine, samples.size()); - } - - private void initialize(UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, int numSamples) { // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ this.UAC = UAC.clone(); this.UAC.MIN_MAPPING_QUALTY_SCORE = Math.max(UAC.MIN_MAPPING_QUALTY_SCORE, UAC.MIN_BASE_QUALTY_SCORE); @@ -117,7 +121,7 @@ public class UnifiedGenotyperEngine { this.verboseWriter = verboseWriter; this.annotationEngine = engine; - N = 2 * numSamples; + N = 2 * this.samples.size(); log10AlleleFrequencyPriorsSNPs = new double[N+1]; log10AlleleFrequencyPriorsIndels = new double[N+1]; computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP); @@ -178,6 +182,33 @@ public class UnifiedGenotyperEngine { return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); } + /** + * Compute genotypes at a given locus. Entry point for engine calls from UGCallVariants. + * + * @param tracker the meta data tracker + * @param refContext the reference base + * @param rawContext contextual information around the locus + * @param vc the GL-annotated variant context + * @return the VariantCallContext object + */ + public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) { + final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext ); + if( model == null ) { + return null; + } + Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); + return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model); + } + + + + + // --------------------------------------------------------------------------------------------------------- + // + // Private implementation helpers + // + // --------------------------------------------------------------------------------------------------------- + // private method called by both UnifiedGenotyper and UGCalcLikelihoods entry points into the engine private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map stratifiedContexts, AlignmentContextUtils.ReadOrientation type, Allele alternateAlleleToUse, boolean useBAQedPileup, final GenotypeLikelihoodsCalculationModel.Model model) { @@ -188,7 +219,7 @@ public class UnifiedGenotyperEngine { Map GLs = new HashMap(); - Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup); + Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine); if ( refAllele != null ) return createVariantContextFromLikelihoods(refContext, refAllele, GLs); @@ -268,24 +299,6 @@ public class UnifiedGenotyperEngine { null); } - /** - * Compute genotypes at a given locus. Entry point for engine calls from UGCallVariants. - * - * @param tracker the meta data tracker - * @param refContext the reference base - * @param rawContext contextual information around the locus - * @param vc the GL-annotated variant context - * @return the VariantCallContext object - */ - public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) { - final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( rawContext ); - if( model == null ) { - return null; - } - Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); - return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model); - } - // private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { diff --git a/java/test/org/broadinstitute/sting/WalkerTest.java b/java/test/org/broadinstitute/sting/WalkerTest.java index 1b8b4206a..822034720 100755 --- a/java/test/org/broadinstitute/sting/WalkerTest.java +++ b/java/test/org/broadinstitute/sting/WalkerTest.java @@ -211,7 +211,7 @@ public class WalkerTest extends BaseTest { if ( md5 == null ) throw new IllegalArgumentException("Null MD5 found in test " + name); if ( md5.equals("") ) // ok - ; + continue; if ( ! StringUtils.isAlphanumeric(md5) ) throw new IllegalArgumentException("MD5 contains non-alphanumeric characters test " + name + " md5=" + md5); if ( md5.length() != exampleMD5.length() ) diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 082f23c69..fa2e9c687 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -234,6 +234,20 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest(String.format("test calling with BAQ"), spec); } + @Test + public void testCallingWithBAQOff() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + + " -o %s" + + " -L 1:10,000,000-10,100,000" + + " -baq OFF", + 1, + Arrays.asList("f9d92a81294569b9c918848932c1a0ca")); + + executeTest(String.format("test calling with BAQ OFF"), spec); + } + // -------------------------------------------------------------------------------------------------------------- // // testing indel caller