From a32470cea1c0042b3607926336d8c58fa0d4785e Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 15 Oct 2009 02:31:45 +0000 Subject: [PATCH] Deal with the fact that walkers can call UG's init/map functions directly. We need to filter contexts in that case since the calling walkers don't get UG's traversal-level filters. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1848 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/genotyper/UnifiedGenotyper.java | 82 +++++++++++++------ ...seTransitionTableCalculatorJavaWalker.java | 2 +- .../gatk/walkers/CoverageEvalWalker.java | 2 +- .../gatk/walkers/DeNovoSNPWalker.java | 2 +- .../utils/ArtificialPoolContext.java | 2 +- 5 files changed, 61 insertions(+), 29 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index a59a943ca..cb012d4fd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -75,25 +76,41 @@ public class UnifiedGenotyper extends LocusWalker, Genot // keep track of some metrics about our calls private CallMetrics callsMetrics; - /** - * Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage). - * - * @param tracker the meta data tracker - * @param ref the reference base - * @param context contextual information around the locus - * - * @return true if we should look at this locus, false otherwise - */ - public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) { - return (BaseUtils.simpleBaseToBaseIndex(ref) != -1 && context.getReads().size() != 0); - } + // are we being called by another walker (which gets around the traversal-level filtering)? + private boolean calledByAnotherWalker = true; /** Enable deletions in the pileup **/ public boolean includeReadsWithDeletionAtLoci() { return true; } - /** Initialize the samples, output, and genotype calculation model **/ + /** + * Initialize the samples, output, and genotype calculation model + * + * *** OTHER WALKERS SHOULD NOT CALL THIS METHOD. USE initializePublic() INSTEAD! *** + * + **/ public void initialize() { + initializePublic(); + + calledByAnotherWalker = false; + + // create the output writer stream + if ( VARIANTS_FILE != null ) + writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, + "UnifiedGenotyper", + this.getToolkit().getArguments().referenceFile.getName()); + else + writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper", + this.getToolkit().getArguments().referenceFile.getName()); + callsMetrics = new CallMetrics(); + } + + /** + * This method should be called by other walkers wanting to use the UnifiedGenotyper. + * If the regular initialize() method is called, subsequent calls to map() may result in + * exceptions being thrown. + */ + public void initializePublic() { // get all of the unique sample names samples = new HashSet(); List readGroups = getToolkit().getSAMFileHeader().getReadGroups(); @@ -104,17 +121,7 @@ public class UnifiedGenotyper extends LocusWalker, Genot for ( String sample : samples ) logger.debug("SAMPLE: " + sample); - // create the output writer stream - if ( VARIANTS_FILE != null ) - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, - "UnifiedGenotyper", - this.getToolkit().getArguments().referenceFile.getName()); - else - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper", - this.getToolkit().getArguments().referenceFile.getName()); - gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC); - callsMetrics = new CallMetrics(); } /** @@ -129,14 +136,39 @@ public class UnifiedGenotyper extends LocusWalker, Genot if ( !BaseUtils.isRegularBase(ref) ) return null; - // an optimization to speed things up when overly covered - if ( UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP ) + // because other walkers externally call this map method with their own contexts, + // we need to make sure that the reads are appropriately filtered + if ( calledByAnotherWalker ) + context = filterAlignmentContext(context); + + // an optimization to speed things up when there is no coverage or when overly covered + if ( context.getReads().size() == 0 || + (UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP) ) return null; DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); return gcm.calculateGenotype(tracker, ref, context, priors); } + // filter the given alignment context + private AlignmentContext filterAlignmentContext(AlignmentContext context) { + List reads = context.getReads(); + List offsets = context.getOffsets(); + + List newReads = new ArrayList(); + List newOffsets = new ArrayList(); + + for (int i = 0; i < reads.size(); i++) { + SAMRecord read = reads.get(i); + if ( read.getMappingQuality() != 0 && read.getReadGroup() != null ) { + newReads.add(read); + newOffsets.add(offsets.get(i)); + } + } + + return new AlignmentContext(context.getLocation(), newReads, newOffsets); + } + /** * Determine whether we're at a Hapmap site * diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index cf24922ff..a74f44b94 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -41,7 +41,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker, String> { public void initialize() { UG = new UnifiedGenotyper(); - UG.initialize(); + UG.initializePublic(); String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT"; out.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index 038380040..a88834bd0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -37,7 +37,7 @@ public class DeNovoSNPWalker extends RefWalker{ public void initialize() { UG = new UnifiedGenotyper(); - UG.initialize(); + UG.initializePublic(); readGroupSets = getToolkit().getMergedReadGroupsByReaders(); } diff --git a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java index 057440d60..a6342177b 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java +++ b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java @@ -74,7 +74,7 @@ public class ArtificialPoolContext { } public void initializeUG() { - ug.initialize(); + ug.initializePublic(); } public void setReadGroupSets(List> rgSets) {