First attempt at removing all traces of extended events from UG; integration tests are expected to fail.

This commit is contained in:
Eric Banks 2012-03-29 14:59:29 -04:00
parent e61e162c81
commit e4469a83ee
5 changed files with 142 additions and 196 deletions

View File

@ -32,9 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -100,43 +98,32 @@ public class ConsensusAlleleCounter {
int insCount = 0, delCount = 0; int insCount = 0, delCount = 0;
// quick check of total number of indels in pileup // quick check of total number of indels in pileup
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); final AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
if (context.hasExtendedEventPileup()) { if ( context.hasBasePileup() ) {
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
insCount += indelPileup.getNumberOfInsertions();
delCount += indelPileup.getNumberOfDeletions();
}
else {
final ReadBackedPileup indelPileup = context.getBasePileup(); final ReadBackedPileup indelPileup = context.getBasePileup();
insCount += indelPileup.getNumberOfInsertionsAfterThisElement(); insCount += indelPileup.getNumberOfInsertionsAfterThisElement();
delCount += indelPileup.getNumberOfDeletionsAfterThisElement(); delCount += indelPileup.getNumberOfDeletionsAfterThisElement();
} }
} }
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping) if ( insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping )
return Collections.emptyMap(); return Collections.emptyMap();
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) { for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
// todo -- warning, can be duplicating expensive partition here // todo -- warning, can be duplicating expensive partition here
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
final ReadBackedPileup indelPileup; if ( !context.hasBasePileup() )
continue;
final int nIndelReads, nReadsOverall; final ReadBackedPileup indelPileup = context.getBasePileup();
if (context.hasExtendedEventPileup()) { final int nIndelReads = indelPileup.getNumberOfInsertionsAfterThisElement() + indelPileup.getNumberOfDeletionsAfterThisElement();
indelPileup = context.getExtendedEventPileup(); final int nReadsOverall = indelPileup.getNumberOfElements();
nIndelReads = ((ReadBackedExtendedEventPileup)indelPileup).getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
nReadsOverall = indelPileup.getNumberOfElements(); if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample ) {
}
else {
indelPileup = context.getBasePileup();
nIndelReads = indelPileup.getNumberOfInsertionsAfterThisElement() + indelPileup.getNumberOfDeletionsAfterThisElement();
nReadsOverall = indelPileup.getNumberOfElements();
}
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
// if ( nIndelReads > 0 ) // if ( nIndelReads > 0 )
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall); // logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
continue; continue;
@ -161,7 +148,7 @@ public class ConsensusAlleleCounter {
*/ */
String indelString = p.getEventBases(); String indelString = p.getEventBases();
if (isInsertion(p)) { if ( p.isBeforeInsertion() ) {
boolean foundKey = false; boolean foundKey = false;
// copy of hashmap into temp arrayList // copy of hashmap into temp arrayList
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>(); ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
@ -229,7 +216,7 @@ public class ConsensusAlleleCounter {
} }
} }
else if (isDeletion(p)) { else if ( p.isBeforeDeletion() ) {
indelString = String.format("D%d",p.getEventLength()); indelString = String.format("D%d",p.getEventLength());
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0; int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
consensusIndelStrings.put(indelString,cnt+1); consensusIndelStrings.put(indelString,cnt+1);
@ -241,25 +228,6 @@ public class ConsensusAlleleCounter {
return consensusIndelStrings; return consensusIndelStrings;
} }
// todo - helper routines to check for extended pileup elements, to remove when extended events are removed
private static final boolean isInsertion(final PileupElement p) {
if (p instanceof ExtendedEventPileupElement)
return ((ExtendedEventPileupElement) p).isInsertion();
else
return p.isBeforeInsertion();
}
private static boolean isDeletion(final PileupElement p) {
if (p instanceof ExtendedEventPileupElement)
return p.isDeletion();
else
return p.isBeforeDeletion();
}
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref, private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
final Map<String, Integer> consensusIndelStrings) { final Map<String, Integer> consensusIndelStrings) {
final GenomeLoc loc = ref.getLocus(); final GenomeLoc loc = ref.getLocus();

View File

@ -120,7 +120,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( sample.hasLikelihoods() ) { if ( sample.hasLikelihoods() ) {
double[] gls = sample.getLikelihoods().getAsVector(); double[] gls = sample.getLikelihoods().getAsVector();
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL ) if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
genotypeLikelihoods.add(gls); genotypeLikelihoods.add(gls);
} }
} }

View File

@ -114,7 +114,7 @@ import java.util.*;
@Reference(window=@Window(start=-200,stop=200)) @Reference(window=@Window(start=-200,stop=200))
@By(DataSource.REFERENCE) @By(DataSource.REFERENCE)
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250) @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatibleWalker { public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatibleWalker {
@ArgumentCollection @ArgumentCollection
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
@ -173,12 +173,6 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Override @Override
public boolean includeReadsWithDeletionAtLoci() { return true; } public boolean includeReadsWithDeletionAtLoci() { return true; }
// enable extended events for indels
@Override
public boolean generateExtendedEvents() {
return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
}
/** /**
* Inner class for collecting output statistics from the UG * Inner class for collecting output statistics from the UG
*/ */
@ -281,7 +275,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
* @param rawContext contextual information around the locus * @param rawContext contextual information around the locus
* @return the VariantCallContext object * @return the VariantCallContext object
*/ */
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { public List<VariantCallContext> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
return UG_engine.calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext); return UG_engine.calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext);
} }
@ -295,32 +289,40 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
return lhs; return lhs;
} }
public UGStatistics reduce(VariantCallContext value, UGStatistics sum) { public UGStatistics reduce(List<VariantCallContext> calls, UGStatistics sum) {
// we get a point for reaching reduce // we get a point for reaching reduce
sum.nBasesVisited++; sum.nBasesVisited++;
// can't call the locus because of no coverage boolean wasCallable = false;
if ( value == null ) boolean wasConfidentlyCalled = false;
return sum;
// A call was attempted -- the base was potentially callable for ( VariantCallContext call : calls ) {
sum.nBasesCallable++; if ( call == null )
continue;
// the base was confidently callable // A call was attempted -- the base was callable
sum.nBasesCalledConfidently += value.confidentlyCalled ? 1 : 0; wasCallable = true;
// can't make a call here // was the base confidently callable?
if ( !value.shouldEmit ) wasConfidentlyCalled = call.confidentlyCalled;
return sum;
try { if ( call.shouldEmit ) {
// we are actually making a call try {
sum.nCallsMade++; // we are actually making a call
writer.add(value); sum.nCallsMade++;
} catch (IllegalArgumentException e) { writer.add(call);
throw new IllegalArgumentException(e.getMessage()); } catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage());
}
}
} }
if ( wasCallable )
sum.nBasesCallable++;
if ( wasConfidentlyCalled )
sum.nBasesCalledConfidently++;
return sum; return sum;
} }

View File

@ -41,7 +41,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -107,6 +106,7 @@ public class UnifiedGenotyperEngine {
private final GenomeLocParser genomeLocParser; private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine; private final boolean BAQEnabledOnCMDLine;
protected static final double SUM_GL_THRESH_NOCALL = VariantContextUtils.SUM_GL_THRESH_NOCALL;
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
@ -150,22 +150,28 @@ public class UnifiedGenotyperEngine {
* @param rawContext contextual information around the locus * @param rawContext contextual information around the locus
* @return the VariantCallContext object * @return the VariantCallContext object
*/ */
public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { public List<VariantCallContext> calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext ); final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);
if( model == null ) {
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
if ( models.isEmpty() ) {
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
}
else {
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
if ( stratifiedContexts == null ) {
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
}
else {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
if ( vc != null )
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model));
}
}
} }
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); return results;
if ( stratifiedContexts == null ) {
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
}
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
if ( vc == null )
return null;
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
} }
/** /**
@ -177,15 +183,20 @@ public class UnifiedGenotyperEngine {
* @return the VariantContext object * @return the VariantContext object
*/ */
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( tracker, refContext, rawContext ); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
if( model == null ) if ( models.isEmpty() ) {
return null; return null;
}
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
if ( stratifiedContexts == null ) final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
return null; // return the first valid one we encounter
if ( stratifiedContexts != null )
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); }
return null;
} }
/** /**
@ -198,17 +209,18 @@ public class UnifiedGenotyperEngine {
* @return the VariantCallContext object * @return the VariantCallContext object
*/ */
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) { public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext ); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
if( model == null ) { if ( models.isEmpty() ) {
return null; return null;
} }
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
// return the first one
final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model); return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
} }
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
// Private implementation helpers // Private implementation helpers
@ -243,13 +255,9 @@ public class UnifiedGenotyperEngine {
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles).make(); vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles).make();
} }
if ( annotationEngine != null ) { if ( annotationEngine != null && rawContext.hasBasePileup() ) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
ReadBackedPileup pileup = null; final ReadBackedPileup pileup = rawContext.getBasePileup();
if (rawContext.hasExtendedEventPileup())
pileup = rawContext.getExtendedEventPileup();
else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc); vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
@ -409,13 +417,9 @@ public class UnifiedGenotyperEngine {
builder.attributes(attributes); builder.attributes(attributes);
VariantContext vcCall = builder.make(); VariantContext vcCall = builder.make();
if ( annotationEngine != null && !limitedContext ) { if ( annotationEngine != null && !limitedContext && rawContext.hasBasePileup() ) {
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
ReadBackedPileup pileup = null; final ReadBackedPileup pileup = rawContext.getBasePileup();
if (rawContext.hasExtendedEventPileup())
pileup = rawContext.getExtendedEventPileup();
else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup();
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
@ -432,52 +436,33 @@ public class UnifiedGenotyperEngine {
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
Map<String, AlignmentContext> stratifiedContexts = null; if ( !BaseUtils.isRegularBase(refContext.getBase()) || !rawContext.hasBasePileup() )
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
return null; return null;
if ( model.name().toUpperCase().contains("INDEL")) { Map<String, AlignmentContext> stratifiedContexts = null;
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if ( model.name().contains("INDEL") ) {
// regular pileup in this case
ReadBackedPileup pileup = rawContext.getBasePileup() .getMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE);
// don't call when there is no coverage final ReadBackedPileup pileup = rawContext.getBasePileup().getMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE);
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ) // don't call when there is no coverage
return null; if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
return null;
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
} else { } else if ( model.name().contains("SNP") ) {
// todo - tmp will get rid of extended events so this wont be needed
if (!rawContext.hasExtendedEventPileup())
return null;
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
// filter the context based on min mapping quality
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE);
// don't call when there is no coverage
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
return null;
// stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
}
} else if ( model.name().toUpperCase().contains("SNP") ) {
// stratify the AlignmentContext and cut by sample // stratify the AlignmentContext and cut by sample
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup()); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) { if ( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
int numDeletions = 0; int numDeletions = 0;
for( final PileupElement p : rawContext.getBasePileup() ) { for ( final PileupElement p : rawContext.getBasePileup() ) {
if( p.isDeletion() ) { numDeletions++; } if ( p.isDeletion() )
numDeletions++;
} }
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().getNumberOfElements()) > UAC.MAX_DELETION_FRACTION ) { if ( ((double) numDeletions) / ((double) rawContext.getBasePileup().getNumberOfElements()) > UAC.MAX_DELETION_FRACTION ) {
return null; return null;
} }
} }
@ -516,12 +501,10 @@ public class UnifiedGenotyperEngine {
int depth = 0; int depth = 0;
if (isCovered) { if ( isCovered ) {
AlignmentContext context = contexts.get(sample); AlignmentContext context = contexts.get(sample);
if (context.hasBasePileup()) if ( context.hasBasePileup() )
depth = context.getBasePileup().depthOfCoverage(); depth = context.getBasePileup().depthOfCoverage();
else if (context.hasExtendedEventPileup())
depth = context.getExtendedEventPileup().depthOfCoverage();
} }
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth); P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
@ -576,52 +559,48 @@ public class UnifiedGenotyperEngine {
(UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING); (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
} }
// decide whether we are currently processing SNPs, indels, or neither // decide whether we are currently processing SNPs, indels, neither, or both
private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel(final RefMetaDataTracker tracker, final ReferenceContext refContext, private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
final AlignmentContext rawContext ) { final ReferenceContext refContext,
if (rawContext.hasExtendedEventPileup() ) { final AlignmentContext rawContext) {
// todo - remove this code
if ((UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) && final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
(UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) )
return GenotypeLikelihoodsCalculationModel.Model.INDEL; if ( rawContext.hasBasePileup() ) {
}
else {
// no extended event pileup
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles); final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
if (vcInput == null) if ( vcInput == null )
return null; return null;
// todo - no support to genotype MNP's yet if ( vcInput.isSNP() ) {
if (vcInput.isMNP()) // ignore SNPs if the user chose INDEL mode only
return null;
if (vcInput.isSNP()) {
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.SNP; models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP")) else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
return UAC.GLmodel; models.add(UAC.GLmodel);
else
// ignore SNP's if user chose INDEL mode
return null;
} }
else if ((vcInput.isIndel() || vcInput.isMixed())) { else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// ignore INDELs if the user chose SNP mode only
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
return GenotypeLikelihoodsCalculationModel.Model.INDEL; models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL")) else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
return UAC.GLmodel; models.add(UAC.GLmodel);
} }
// No support for other types yet
} }
else { else {
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) {
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
return GenotypeLikelihoodsCalculationModel.Model.SNP; models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL")) }
return UAC.GLmodel; else {
models.add(UAC.GLmodel);
}
} }
} }
return null;
return models;
} }
protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) { protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
@ -668,24 +647,21 @@ public class UnifiedGenotyperEngine {
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model); else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
} }
private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) { private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>(); final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
// GenotypeLikelihoodsCalculationModel.Model.
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
for (int i = 0; i < glmClasses.size(); i++) { for (int i = 0; i < glmClasses.size(); i++) {
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i); final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase(); final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
//System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
try { try {
Object args[] = new Object[]{UAC,logger}; final Object args[] = new Object[]{UAC,logger};
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class); final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args)); glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
} }
catch (Exception e) { catch (Exception e) {
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage()); throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage());
} }
} }
@ -719,7 +695,7 @@ public class UnifiedGenotyperEngine {
VariantContext vc = null; VariantContext vc = null;
// search for usable record // search for usable record
for( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) { for ( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) {
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) { if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
if ( vc == null ) { if ( vc == null ) {
vc = vc_input; vc = vc_input;

View File

@ -379,12 +379,12 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
VariantCallContext call; VariantCallContext call;
if ( vcComp.isSNP() ) { if ( vcComp.isSNP() ) {
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
} else if ( vcComp.isIndel() ) { } else if ( vcComp.isIndel() ) {
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
} else if ( bamIsTruth ) { } else if ( bamIsTruth ) {
// assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam // assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context); call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
} else { } else {
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles()); logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
return counter; return counter;