First attempt at removing all traces of extended events from UG; integration tests are expected to fail.
This commit is contained in:
parent
e61e162c81
commit
e4469a83ee
|
|
@ -32,9 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
@ -100,43 +98,32 @@ public class ConsensusAlleleCounter {
|
||||||
|
|
||||||
int insCount = 0, delCount = 0;
|
int insCount = 0, delCount = 0;
|
||||||
// quick check of total number of indels in pileup
|
// quick check of total number of indels in pileup
|
||||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
final AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||||
|
|
||||||
if (context.hasExtendedEventPileup()) {
|
if ( context.hasBasePileup() ) {
|
||||||
final ReadBackedExtendedEventPileup indelPileup = context.getExtendedEventPileup();
|
|
||||||
insCount += indelPileup.getNumberOfInsertions();
|
|
||||||
delCount += indelPileup.getNumberOfDeletions();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
final ReadBackedPileup indelPileup = context.getBasePileup();
|
final ReadBackedPileup indelPileup = context.getBasePileup();
|
||||||
insCount += indelPileup.getNumberOfInsertionsAfterThisElement();
|
insCount += indelPileup.getNumberOfInsertionsAfterThisElement();
|
||||||
delCount += indelPileup.getNumberOfDeletionsAfterThisElement();
|
delCount += indelPileup.getNumberOfDeletionsAfterThisElement();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
if ( insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping )
|
||||||
return Collections.emptyMap();
|
return Collections.emptyMap();
|
||||||
|
|
||||||
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
for (Map.Entry<String, AlignmentContext> sample : contexts.entrySet()) {
|
||||||
// todo -- warning, can be duplicating expensive partition here
|
// todo -- warning, can be duplicating expensive partition here
|
||||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||||
|
|
||||||
final ReadBackedPileup indelPileup;
|
if ( !context.hasBasePileup() )
|
||||||
|
continue;
|
||||||
|
|
||||||
final int nIndelReads, nReadsOverall;
|
final ReadBackedPileup indelPileup = context.getBasePileup();
|
||||||
|
|
||||||
if (context.hasExtendedEventPileup()) {
|
final int nIndelReads = indelPileup.getNumberOfInsertionsAfterThisElement() + indelPileup.getNumberOfDeletionsAfterThisElement();
|
||||||
indelPileup = context.getExtendedEventPileup();
|
final int nReadsOverall = indelPileup.getNumberOfElements();
|
||||||
nIndelReads = ((ReadBackedExtendedEventPileup)indelPileup).getNumberOfInsertions() + indelPileup.getNumberOfDeletions();
|
|
||||||
nReadsOverall = indelPileup.getNumberOfElements();
|
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample ) {
|
||||||
}
|
|
||||||
else {
|
|
||||||
indelPileup = context.getBasePileup();
|
|
||||||
nIndelReads = indelPileup.getNumberOfInsertionsAfterThisElement() + indelPileup.getNumberOfDeletionsAfterThisElement();
|
|
||||||
nReadsOverall = indelPileup.getNumberOfElements();
|
|
||||||
}
|
|
||||||
if ( nIndelReads == 0 || (nIndelReads / (1.0 * nReadsOverall)) < minFractionInOneSample) {
|
|
||||||
// if ( nIndelReads > 0 )
|
// if ( nIndelReads > 0 )
|
||||||
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
|
// logger.info("Skipping sample " + sample.getKey() + " with nIndelReads " + nIndelReads + " nReads " + nReadsOverall);
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -161,7 +148,7 @@ public class ConsensusAlleleCounter {
|
||||||
*/
|
*/
|
||||||
String indelString = p.getEventBases();
|
String indelString = p.getEventBases();
|
||||||
|
|
||||||
if (isInsertion(p)) {
|
if ( p.isBeforeInsertion() ) {
|
||||||
boolean foundKey = false;
|
boolean foundKey = false;
|
||||||
// copy of hashmap into temp arrayList
|
// copy of hashmap into temp arrayList
|
||||||
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
ArrayList<Pair<String,Integer>> cList = new ArrayList<Pair<String,Integer>>();
|
||||||
|
|
@ -229,7 +216,7 @@ public class ConsensusAlleleCounter {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (isDeletion(p)) {
|
else if ( p.isBeforeDeletion() ) {
|
||||||
indelString = String.format("D%d",p.getEventLength());
|
indelString = String.format("D%d",p.getEventLength());
|
||||||
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
int cnt = consensusIndelStrings.containsKey(indelString)? consensusIndelStrings.get(indelString):0;
|
||||||
consensusIndelStrings.put(indelString,cnt+1);
|
consensusIndelStrings.put(indelString,cnt+1);
|
||||||
|
|
@ -241,25 +228,6 @@ public class ConsensusAlleleCounter {
|
||||||
return consensusIndelStrings;
|
return consensusIndelStrings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// todo - helper routines to check for extended pileup elements, to remove when extended events are removed
|
|
||||||
private static final boolean isInsertion(final PileupElement p) {
|
|
||||||
if (p instanceof ExtendedEventPileupElement)
|
|
||||||
return ((ExtendedEventPileupElement) p).isInsertion();
|
|
||||||
else
|
|
||||||
return p.isBeforeInsertion();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isDeletion(final PileupElement p) {
|
|
||||||
if (p instanceof ExtendedEventPileupElement)
|
|
||||||
return p.isDeletion();
|
|
||||||
else
|
|
||||||
return p.isBeforeDeletion();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
|
private List<Allele> consensusCountsToAlleles(final ReferenceContext ref,
|
||||||
final Map<String, Integer> consensusIndelStrings) {
|
final Map<String, Integer> consensusIndelStrings) {
|
||||||
final GenomeLoc loc = ref.getLocus();
|
final GenomeLoc loc = ref.getLocus();
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( sample.hasLikelihoods() ) {
|
if ( sample.hasLikelihoods() ) {
|
||||||
double[] gls = sample.getLikelihoods().getAsVector();
|
double[] gls = sample.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
if ( MathUtils.sum(gls) < VariantContextUtils.SUM_GL_THRESH_NOCALL )
|
if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL )
|
||||||
genotypeLikelihoods.add(gls);
|
genotypeLikelihoods.add(gls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@ import java.util.*;
|
||||||
@Reference(window=@Window(start=-200,stop=200))
|
@Reference(window=@Window(start=-200,stop=200))
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
|
||||||
public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatibleWalker {
|
public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatibleWalker {
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||||
|
|
@ -173,12 +173,6 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
@Override
|
@Override
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
// enable extended events for indels
|
|
||||||
@Override
|
|
||||||
public boolean generateExtendedEvents() {
|
|
||||||
return (UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Inner class for collecting output statistics from the UG
|
* Inner class for collecting output statistics from the UG
|
||||||
*/
|
*/
|
||||||
|
|
@ -281,7 +275,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
* @param rawContext contextual information around the locus
|
* @param rawContext contextual information around the locus
|
||||||
* @return the VariantCallContext object
|
* @return the VariantCallContext object
|
||||||
*/
|
*/
|
||||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
public List<VariantCallContext> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
return UG_engine.calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext);
|
return UG_engine.calculateLikelihoodsAndGenotypes(tracker, refContext, rawContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -295,32 +289,40 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
return lhs;
|
return lhs;
|
||||||
}
|
}
|
||||||
|
|
||||||
public UGStatistics reduce(VariantCallContext value, UGStatistics sum) {
|
public UGStatistics reduce(List<VariantCallContext> calls, UGStatistics sum) {
|
||||||
// we get a point for reaching reduce
|
// we get a point for reaching reduce
|
||||||
sum.nBasesVisited++;
|
sum.nBasesVisited++;
|
||||||
|
|
||||||
// can't call the locus because of no coverage
|
boolean wasCallable = false;
|
||||||
if ( value == null )
|
boolean wasConfidentlyCalled = false;
|
||||||
return sum;
|
|
||||||
|
|
||||||
// A call was attempted -- the base was potentially callable
|
for ( VariantCallContext call : calls ) {
|
||||||
sum.nBasesCallable++;
|
if ( call == null )
|
||||||
|
continue;
|
||||||
|
|
||||||
// the base was confidently callable
|
// A call was attempted -- the base was callable
|
||||||
sum.nBasesCalledConfidently += value.confidentlyCalled ? 1 : 0;
|
wasCallable = true;
|
||||||
|
|
||||||
// can't make a call here
|
// was the base confidently callable?
|
||||||
if ( !value.shouldEmit )
|
wasConfidentlyCalled = call.confidentlyCalled;
|
||||||
return sum;
|
|
||||||
|
|
||||||
try {
|
if ( call.shouldEmit ) {
|
||||||
// we are actually making a call
|
try {
|
||||||
sum.nCallsMade++;
|
// we are actually making a call
|
||||||
writer.add(value);
|
sum.nCallsMade++;
|
||||||
} catch (IllegalArgumentException e) {
|
writer.add(call);
|
||||||
throw new IllegalArgumentException(e.getMessage());
|
} catch (IllegalArgumentException e) {
|
||||||
|
throw new IllegalArgumentException(e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( wasCallable )
|
||||||
|
sum.nBasesCallable++;
|
||||||
|
|
||||||
|
if ( wasConfidentlyCalled )
|
||||||
|
sum.nBasesCalledConfidently++;
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,6 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
|
@ -107,6 +106,7 @@ public class UnifiedGenotyperEngine {
|
||||||
private final GenomeLocParser genomeLocParser;
|
private final GenomeLocParser genomeLocParser;
|
||||||
private final boolean BAQEnabledOnCMDLine;
|
private final boolean BAQEnabledOnCMDLine;
|
||||||
|
|
||||||
|
protected static final double SUM_GL_THRESH_NOCALL = VariantContextUtils.SUM_GL_THRESH_NOCALL;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -150,22 +150,28 @@ public class UnifiedGenotyperEngine {
|
||||||
* @param rawContext contextual information around the locus
|
* @param rawContext contextual information around the locus
|
||||||
* @return the VariantCallContext object
|
* @return the VariantCallContext object
|
||||||
*/
|
*/
|
||||||
public VariantCallContext calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
public List<VariantCallContext> calculateLikelihoodsAndGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext );
|
final List<VariantCallContext> results = new ArrayList<VariantCallContext>(2);
|
||||||
if( model == null ) {
|
|
||||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||||
|
if ( models.isEmpty() ) {
|
||||||
|
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||||
|
if ( stratifiedContexts == null ) {
|
||||||
|
results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||||
|
if ( vc != null )
|
||||||
|
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
return results;
|
||||||
if ( stratifiedContexts == null ) {
|
|
||||||
return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null);
|
|
||||||
}
|
|
||||||
|
|
||||||
VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
|
||||||
if ( vc == null )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -177,15 +183,20 @@ public class UnifiedGenotyperEngine {
|
||||||
* @return the VariantContext object
|
* @return the VariantContext object
|
||||||
*/
|
*/
|
||||||
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel( tracker, refContext, rawContext );
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||||
if( model == null )
|
if ( models.isEmpty() ) {
|
||||||
return null;
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
||||||
if ( stratifiedContexts == null )
|
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||||
return null;
|
// return the first valid one we encounter
|
||||||
|
if ( stratifiedContexts != null )
|
||||||
|
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||||
|
|
||||||
return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -198,17 +209,18 @@ public class UnifiedGenotyperEngine {
|
||||||
* @return the VariantCallContext object
|
* @return the VariantCallContext object
|
||||||
*/
|
*/
|
||||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) {
|
||||||
final GenotypeLikelihoodsCalculationModel.Model model = getCurrentGLModel(tracker, refContext, rawContext );
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, refContext, rawContext);
|
||||||
if( model == null ) {
|
if ( models.isEmpty() ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
|
||||||
|
// return the first one
|
||||||
|
final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
|
||||||
|
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model);
|
||||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Private implementation helpers
|
// Private implementation helpers
|
||||||
|
|
@ -243,13 +255,9 @@ public class UnifiedGenotyperEngine {
|
||||||
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles).make();
|
vc = new VariantContextBuilder("UG_call", ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStart(), alleles).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( annotationEngine != null ) {
|
if ( annotationEngine != null && rawContext.hasBasePileup() ) {
|
||||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||||
ReadBackedPileup pileup = null;
|
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||||
if (rawContext.hasExtendedEventPileup())
|
|
||||||
pileup = rawContext.getExtendedEventPileup();
|
|
||||||
else if (rawContext.hasBasePileup())
|
|
||||||
pileup = rawContext.getBasePileup();
|
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||||
|
|
||||||
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
|
vc = annotationEngine.annotateContext(tracker, ref, stratifiedContexts, vc);
|
||||||
|
|
@ -409,13 +417,9 @@ public class UnifiedGenotyperEngine {
|
||||||
builder.attributes(attributes);
|
builder.attributes(attributes);
|
||||||
VariantContext vcCall = builder.make();
|
VariantContext vcCall = builder.make();
|
||||||
|
|
||||||
if ( annotationEngine != null && !limitedContext ) {
|
if ( annotationEngine != null && !limitedContext && rawContext.hasBasePileup() ) {
|
||||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||||
ReadBackedPileup pileup = null;
|
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||||
if (rawContext.hasExtendedEventPileup())
|
|
||||||
pileup = rawContext.getExtendedEventPileup();
|
|
||||||
else if (rawContext.hasBasePileup())
|
|
||||||
pileup = rawContext.getBasePileup();
|
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||||
|
|
||||||
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall);
|
||||||
|
|
@ -432,52 +436,33 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
|
|
||||||
Map<String, AlignmentContext> stratifiedContexts = null;
|
if ( !BaseUtils.isRegularBase(refContext.getBase()) || !rawContext.hasBasePileup() )
|
||||||
|
|
||||||
if ( !BaseUtils.isRegularBase( refContext.getBase() ) )
|
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
if ( model.name().toUpperCase().contains("INDEL")) {
|
Map<String, AlignmentContext> stratifiedContexts = null;
|
||||||
|
|
||||||
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
if ( model.name().contains("INDEL") ) {
|
||||||
// regular pileup in this case
|
|
||||||
ReadBackedPileup pileup = rawContext.getBasePileup() .getMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE);
|
|
||||||
|
|
||||||
// don't call when there is no coverage
|
final ReadBackedPileup pileup = rawContext.getBasePileup().getMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE);
|
||||||
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
// don't call when there is no coverage
|
||||||
return null;
|
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||||
|
return null;
|
||||||
|
|
||||||
// stratify the AlignmentContext and cut by sample
|
// stratify the AlignmentContext and cut by sample
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||||
|
|
||||||
} else {
|
} else if ( model.name().contains("SNP") ) {
|
||||||
|
|
||||||
// todo - tmp will get rid of extended events so this wont be needed
|
|
||||||
if (!rawContext.hasExtendedEventPileup())
|
|
||||||
return null;
|
|
||||||
ReadBackedExtendedEventPileup rawPileup = rawContext.getExtendedEventPileup();
|
|
||||||
|
|
||||||
// filter the context based on min mapping quality
|
|
||||||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE);
|
|
||||||
|
|
||||||
// don't call when there is no coverage
|
|
||||||
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
|
||||||
return null;
|
|
||||||
|
|
||||||
// stratify the AlignmentContext and cut by sample
|
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
|
||||||
}
|
|
||||||
} else if ( model.name().toUpperCase().contains("SNP") ) {
|
|
||||||
|
|
||||||
// stratify the AlignmentContext and cut by sample
|
// stratify the AlignmentContext and cut by sample
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(rawContext.getBasePileup());
|
||||||
|
|
||||||
if( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
|
if ( !(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ) {
|
||||||
int numDeletions = 0;
|
int numDeletions = 0;
|
||||||
for( final PileupElement p : rawContext.getBasePileup() ) {
|
for ( final PileupElement p : rawContext.getBasePileup() ) {
|
||||||
if( p.isDeletion() ) { numDeletions++; }
|
if ( p.isDeletion() )
|
||||||
|
numDeletions++;
|
||||||
}
|
}
|
||||||
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().getNumberOfElements()) > UAC.MAX_DELETION_FRACTION ) {
|
if ( ((double) numDeletions) / ((double) rawContext.getBasePileup().getNumberOfElements()) > UAC.MAX_DELETION_FRACTION ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -516,12 +501,10 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
|
|
||||||
if (isCovered) {
|
if ( isCovered ) {
|
||||||
AlignmentContext context = contexts.get(sample);
|
AlignmentContext context = contexts.get(sample);
|
||||||
if (context.hasBasePileup())
|
if ( context.hasBasePileup() )
|
||||||
depth = context.getBasePileup().depthOfCoverage();
|
depth = context.getBasePileup().depthOfCoverage();
|
||||||
else if (context.hasExtendedEventPileup())
|
|
||||||
depth = context.getExtendedEventPileup().depthOfCoverage();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||||
|
|
@ -576,52 +559,48 @@ public class UnifiedGenotyperEngine {
|
||||||
(UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
(UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
|
||||||
}
|
}
|
||||||
|
|
||||||
// decide whether we are currently processing SNPs, indels, or neither
|
// decide whether we are currently processing SNPs, indels, neither, or both
|
||||||
private GenotypeLikelihoodsCalculationModel.Model getCurrentGLModel(final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
|
||||||
final AlignmentContext rawContext ) {
|
final ReferenceContext refContext,
|
||||||
if (rawContext.hasExtendedEventPileup() ) {
|
final AlignmentContext rawContext) {
|
||||||
// todo - remove this code
|
|
||||||
if ((UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH || UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.INDEL) &&
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
|
||||||
(UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) )
|
|
||||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
if ( rawContext.hasBasePileup() ) {
|
||||||
}
|
|
||||||
else {
|
|
||||||
// no extended event pileup
|
|
||||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||||
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
|
final VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||||
if (vcInput == null)
|
if ( vcInput == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// todo - no support to genotype MNP's yet
|
if ( vcInput.isSNP() ) {
|
||||||
if (vcInput.isMNP())
|
// ignore SNPs if the user chose INDEL mode only
|
||||||
return null;
|
|
||||||
|
|
||||||
if (vcInput.isSNP()) {
|
|
||||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP"))
|
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
||||||
return UAC.GLmodel;
|
models.add(UAC.GLmodel);
|
||||||
else
|
|
||||||
// ignore SNP's if user chose INDEL mode
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
else if ((vcInput.isIndel() || vcInput.isMixed())) {
|
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
|
||||||
|
// ignore INDELs if the user chose SNP mode only
|
||||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
||||||
return GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||||
return UAC.GLmodel;
|
models.add(UAC.GLmodel);
|
||||||
}
|
}
|
||||||
|
// No support for other types yet
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// todo - this assumes SNP's take priority when BOTH is selected, should do a smarter way once extended events are removed
|
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) {
|
||||||
if( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||||
return GenotypeLikelihoodsCalculationModel.Model.SNP;
|
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||||
else if (UAC.GLmodel.name().toUpperCase().contains("SNP") || UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
}
|
||||||
return UAC.GLmodel;
|
else {
|
||||||
|
models.add(UAC.GLmodel);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
|
return models;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
protected static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {
|
||||||
|
|
@ -668,24 +647,21 @@ public class UnifiedGenotyperEngine {
|
||||||
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
else throw new IllegalArgumentException("Unexpected GenotypeCalculationModel " + model);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Map<String,GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||||
|
|
||||||
|
final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
|
||||||
Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<String, GenotypeLikelihoodsCalculationModel>();
|
final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
|
||||||
// GenotypeLikelihoodsCalculationModel.Model.
|
|
||||||
List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
|
|
||||||
|
|
||||||
for (int i = 0; i < glmClasses.size(); i++) {
|
for (int i = 0; i < glmClasses.size(); i++) {
|
||||||
Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
|
final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass = glmClasses.get(i);
|
||||||
String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
|
final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
|
||||||
//System.out.println("KEY:"+key+"\t" + glmClass.getSimpleName());
|
|
||||||
try {
|
try {
|
||||||
Object args[] = new Object[]{UAC,logger};
|
final Object args[] = new Object[]{UAC,logger};
|
||||||
Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
|
final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
|
||||||
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
|
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
throw new UserException("Incorrect specification for argument glm:"+UAC.GLmodel+e.getMessage());
|
throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -719,7 +695,7 @@ public class UnifiedGenotyperEngine {
|
||||||
VariantContext vc = null;
|
VariantContext vc = null;
|
||||||
|
|
||||||
// search for usable record
|
// search for usable record
|
||||||
for( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) {
|
for ( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) {
|
||||||
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
|
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
|
||||||
if ( vc == null ) {
|
if ( vc == null ) {
|
||||||
vc = vc_input;
|
vc = vc_input;
|
||||||
|
|
|
||||||
|
|
@ -379,12 +379,12 @@ public class GenotypeAndValidateWalker extends RodWalker<GenotypeAndValidateWalk
|
||||||
|
|
||||||
VariantCallContext call;
|
VariantCallContext call;
|
||||||
if ( vcComp.isSNP() ) {
|
if ( vcComp.isSNP() ) {
|
||||||
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
|
||||||
} else if ( vcComp.isIndel() ) {
|
} else if ( vcComp.isIndel() ) {
|
||||||
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
call = indelEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
|
||||||
} else if ( bamIsTruth ) {
|
} else if ( bamIsTruth ) {
|
||||||
// assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam
|
// assume it's a SNP if no variation is present; this is necessary so that we can test supposed monomorphic sites against the truth bam
|
||||||
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context);
|
call = snpEngine.calculateLikelihoodsAndGenotypes(tracker, ref, context).get(0);
|
||||||
} else {
|
} else {
|
||||||
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
|
logger.info("Not SNP or INDEL " + vcComp.getChr() + ":" + vcComp.getStart() + " " + vcComp.getAlleles());
|
||||||
return counter;
|
return counter;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue