Deal with the fact that walkers can call UG's init/map functions directly.

We need to filter contexts in that case since the calling walkers don't get UG's traversal-level filters.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1848 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-10-15 02:31:45 +00:00
parent 8dca236958
commit a32470cea1
5 changed files with 61 additions and 29 deletions

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper; package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -75,25 +76,41 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
// keep track of some metrics about our calls // keep track of some metrics about our calls
private CallMetrics callsMetrics; private CallMetrics callsMetrics;
/** // are we being called by another walker (which gets around the traversal-level filtering)?
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage). private boolean calledByAnotherWalker = true;
*
* @param tracker the meta data tracker
* @param ref the reference base
* @param context contextual information around the locus
*
* @return true if we should look at this locus, false otherwise
*/
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
return (BaseUtils.simpleBaseToBaseIndex(ref) != -1 && context.getReads().size() != 0);
}
/** Enable deletions in the pileup **/ /** Enable deletions in the pileup **/
public boolean includeReadsWithDeletionAtLoci() { return true; } public boolean includeReadsWithDeletionAtLoci() { return true; }
/** Initialize the samples, output, and genotype calculation model **/ /**
* Initialize the samples, output, and genotype calculation model
*
* *** OTHER WALKERS SHOULD NOT CALL THIS METHOD. USE initializePublic() INSTEAD! ***
*
**/
public void initialize() { public void initialize() {
initializePublic();
calledByAnotherWalker = false;
// create the output writer stream
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
"UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName());
else
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName());
callsMetrics = new CallMetrics();
}
/**
* This method should be called by other walkers wanting to use the UnifiedGenotyper.
* If the regular initialize() method is called, subsequent calls to map() may result in
* exceptions being thrown.
*/
public void initializePublic() {
// get all of the unique sample names // get all of the unique sample names
samples = new HashSet<String>(); samples = new HashSet<String>();
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups(); List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
@ -104,17 +121,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
for ( String sample : samples ) for ( String sample : samples )
logger.debug("SAMPLE: " + sample); logger.debug("SAMPLE: " + sample);
// create the output writer stream
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
"UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName());
else
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName());
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC); gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
callsMetrics = new CallMetrics();
} }
/** /**
@ -129,14 +136,39 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
if ( !BaseUtils.isRegularBase(ref) ) if ( !BaseUtils.isRegularBase(ref) )
return null; return null;
// an optimization to speed things up when overly covered // because other walkers externally call this map method with their own contexts,
if ( UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP ) // we need to make sure that the reads are appropriately filtered
if ( calledByAnotherWalker )
context = filterAlignmentContext(context);
// an optimization to speed things up when there is no coverage or when overly covered
if ( context.getReads().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP) )
return null; return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
return gcm.calculateGenotype(tracker, ref, context, priors); return gcm.calculateGenotype(tracker, ref, context, priors);
} }
// filter the given alignment context
private AlignmentContext filterAlignmentContext(AlignmentContext context) {
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
List<Integer> newOffsets = new ArrayList<Integer>();
for (int i = 0; i < reads.size(); i++) {
SAMRecord read = reads.get(i);
if ( read.getMappingQuality() != 0 && read.getReadGroup() != null ) {
newReads.add(read);
newOffsets.add(offsets.get(i));
}
}
return new AlignmentContext(context.getLocation(), newReads, newOffsets);
}
/** /**
* Determine whether we're at a Hapmap site * Determine whether we're at a Hapmap site
* *

View File

@ -41,7 +41,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
public void initialize() { public void initialize() {
ug = new UnifiedGenotyper(); ug = new UnifiedGenotyper();
ug.initialize(); ug.initializePublic();
refWindow = new ReferenceContextWindow(nPreviousBases); refWindow = new ReferenceContextWindow(nPreviousBases);
} }

View File

@ -41,7 +41,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
public void initialize() { public void initialize() {
UG = new UnifiedGenotyper(); UG = new UnifiedGenotyper();
UG.initialize(); UG.initializePublic();
String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT"; String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
out.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1)); out.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1));

View File

@ -37,7 +37,7 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
public void initialize() { public void initialize() {
UG = new UnifiedGenotyper(); UG = new UnifiedGenotyper();
UG.initialize(); UG.initializePublic();
readGroupSets = getToolkit().getMergedReadGroupsByReaders(); readGroupSets = getToolkit().getMergedReadGroupsByReaders();
} }

View File

@ -74,7 +74,7 @@ public class ArtificialPoolContext {
} }
public void initializeUG() { public void initializeUG() {
ug.initialize(); ug.initializePublic();
} }
public void setReadGroupSets(List<Set<String>> rgSets) { public void setReadGroupSets(List<Set<String>> rgSets) {