Deal with the fact that walkers can call UG's init/map functions directly.
We need to filter contexts in that case since the calling walkers don't get UG's traversal-level filters. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1848 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8dca236958
commit
a32470cea1
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -75,25 +76,41 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
||||||
// keep track of some metrics about our calls
|
// keep track of some metrics about our calls
|
||||||
private CallMetrics callsMetrics;
|
private CallMetrics callsMetrics;
|
||||||
|
|
||||||
/**
|
// are we being called by another walker (which gets around the traversal-level filtering)?
|
||||||
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
private boolean calledByAnotherWalker = true;
|
||||||
*
|
|
||||||
* @param tracker the meta data tracker
|
|
||||||
* @param ref the reference base
|
|
||||||
* @param context contextual information around the locus
|
|
||||||
*
|
|
||||||
* @return true if we should look at this locus, false otherwise
|
|
||||||
*/
|
|
||||||
public boolean filter(RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
|
||||||
return (BaseUtils.simpleBaseToBaseIndex(ref) != -1 && context.getReads().size() != 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Enable deletions in the pileup **/
|
/** Enable deletions in the pileup **/
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
/** Initialize the samples, output, and genotype calculation model **/
|
/**
|
||||||
|
* Initialize the samples, output, and genotype calculation model
|
||||||
|
*
|
||||||
|
* *** OTHER WALKERS SHOULD NOT CALL THIS METHOD. USE initializePublic() INSTEAD! ***
|
||||||
|
*
|
||||||
|
**/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
|
initializePublic();
|
||||||
|
|
||||||
|
calledByAnotherWalker = false;
|
||||||
|
|
||||||
|
// create the output writer stream
|
||||||
|
if ( VARIANTS_FILE != null )
|
||||||
|
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||||
|
"UnifiedGenotyper",
|
||||||
|
this.getToolkit().getArguments().referenceFile.getName());
|
||||||
|
else
|
||||||
|
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
||||||
|
this.getToolkit().getArguments().referenceFile.getName());
|
||||||
|
callsMetrics = new CallMetrics();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method should be called by other walkers wanting to use the UnifiedGenotyper.
|
||||||
|
* If the regular initialize() method is called, subsequent calls to map() may result in
|
||||||
|
* exceptions being thrown.
|
||||||
|
*/
|
||||||
|
public void initializePublic() {
|
||||||
// get all of the unique sample names
|
// get all of the unique sample names
|
||||||
samples = new HashSet<String>();
|
samples = new HashSet<String>();
|
||||||
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
|
List<SAMReadGroupRecord> readGroups = getToolkit().getSAMFileHeader().getReadGroups();
|
||||||
|
|
@ -104,17 +121,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
||||||
for ( String sample : samples )
|
for ( String sample : samples )
|
||||||
logger.debug("SAMPLE: " + sample);
|
logger.debug("SAMPLE: " + sample);
|
||||||
|
|
||||||
// create the output writer stream
|
|
||||||
if ( VARIANTS_FILE != null )
|
|
||||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
|
||||||
"UnifiedGenotyper",
|
|
||||||
this.getToolkit().getArguments().referenceFile.getName());
|
|
||||||
else
|
|
||||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
|
||||||
this.getToolkit().getArguments().referenceFile.getName());
|
|
||||||
|
|
||||||
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
|
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC);
|
||||||
callsMetrics = new CallMetrics();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -129,14 +136,39 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
||||||
if ( !BaseUtils.isRegularBase(ref) )
|
if ( !BaseUtils.isRegularBase(ref) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// an optimization to speed things up when overly covered
|
// because other walkers externally call this map method with their own contexts,
|
||||||
if ( UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP )
|
// we need to make sure that the reads are appropriately filtered
|
||||||
|
if ( calledByAnotherWalker )
|
||||||
|
context = filterAlignmentContext(context);
|
||||||
|
|
||||||
|
// an optimization to speed things up when there is no coverage or when overly covered
|
||||||
|
if ( context.getReads().size() == 0 ||
|
||||||
|
(UAC.MAX_READS_IN_PILEUP > 0 && context.getReads().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||||
return gcm.calculateGenotype(tracker, ref, context, priors);
|
return gcm.calculateGenotype(tracker, ref, context, priors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// filter the given alignment context
|
||||||
|
private AlignmentContext filterAlignmentContext(AlignmentContext context) {
|
||||||
|
List<SAMRecord> reads = context.getReads();
|
||||||
|
List<Integer> offsets = context.getOffsets();
|
||||||
|
|
||||||
|
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
|
||||||
|
List<Integer> newOffsets = new ArrayList<Integer>();
|
||||||
|
|
||||||
|
for (int i = 0; i < reads.size(); i++) {
|
||||||
|
SAMRecord read = reads.get(i);
|
||||||
|
if ( read.getMappingQuality() != 0 && read.getReadGroup() != null ) {
|
||||||
|
newReads.add(read);
|
||||||
|
newOffsets.add(offsets.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new AlignmentContext(context.getLocation(), newReads, newOffsets);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine whether we're at a Hapmap site
|
* Determine whether we're at a Hapmap site
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
ug = new UnifiedGenotyper();
|
ug = new UnifiedGenotyper();
|
||||||
ug.initialize();
|
ug.initializePublic();
|
||||||
refWindow = new ReferenceContextWindow(nPreviousBases);
|
refWindow = new ReferenceContextWindow(nPreviousBases);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
UG = new UnifiedGenotyper();
|
UG = new UnifiedGenotyper();
|
||||||
UG.initialize();
|
UG.initializePublic();
|
||||||
|
|
||||||
String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
|
String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
|
||||||
out.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1));
|
out.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1));
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
UG = new UnifiedGenotyper();
|
UG = new UnifiedGenotyper();
|
||||||
UG.initialize();
|
UG.initializePublic();
|
||||||
|
|
||||||
readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
readGroupSets = getToolkit().getMergedReadGroupsByReaders();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ public class ArtificialPoolContext {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initializeUG() {
|
public void initializeUG() {
|
||||||
ug.initialize();
|
ug.initializePublic();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setReadGroupSets(List<Set<String>> rgSets) {
|
public void setReadGroupSets(List<Set<String>> rgSets) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue