Merge pull request #1417 from broadinstitute/rhl_ggvcf_span_del
Removed spanning deletions if the deletion was removed
This commit is contained in:
commit
b4eb479727
|
|
@ -104,8 +104,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
|
|
||||||
protected final GenomeLocParser genomeLocParser;
|
protected final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
protected static int maxNumPLValuesObserved = 0;
|
private final List<GenomeLoc> upstreamDeletionsLoc = new LinkedList<>();
|
||||||
protected static int numTimesMaxNumPLValuesExceeded = 0;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new genotyper engine, on a specific subset of samples.
|
* Construct a new genotyper engine, on a specific subset of samples.
|
||||||
|
|
@ -233,7 +232,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
|
final AFCalculator afCalculator = afCalculatorProvider.getInstance(vc,defaultPloidy,maxAltAlleles).setMaxNumPLValues(maxNumPLValues);
|
||||||
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
final AFCalculationResult AFresult = afCalculator.getLog10PNonRef(vc, defaultPloidy,maxAltAlleles, getAlleleFrequencyPriors(vc,defaultPloidy,model));
|
||||||
|
|
||||||
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult);
|
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc);
|
||||||
|
|
||||||
final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
|
final double PoFGT0 = Math.pow(10, AFresult.getLog10PosteriorOfAFGT0());
|
||||||
|
|
||||||
|
|
@ -341,13 +340,13 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provided the exact mode computations it returns the appropriate subset of alleles that progress to genotyping.
|
* Provided the exact mode computations it returns the appropriate subset of alleles that progress to genotyping.
|
||||||
* @param afcr the exact model calcualtion result.
|
* @param afcr the exact model calcualtion result.
|
||||||
* @return never {@code null}.
|
* @param vc the input variant context
|
||||||
|
* @return information about the alternative allele subsetting {@code null}.
|
||||||
*/
|
*/
|
||||||
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afcr) {
|
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afcr, final VariantContext vc) {
|
||||||
final List<Allele> alleles = afcr.getAllelesUsedInGenotyping();
|
final List<Allele> alleles = afcr.getAllelesUsedInGenotyping();
|
||||||
|
|
||||||
final int alternativeAlleleCount = alleles.size() - 1;
|
final int alternativeAlleleCount = alleles.size() - 1;
|
||||||
|
|
@ -355,23 +354,74 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
||||||
final int[] mleCounts = new int[alternativeAlleleCount];
|
final int[] mleCounts = new int[alternativeAlleleCount];
|
||||||
int outputAlleleCount = 0;
|
int outputAlleleCount = 0;
|
||||||
boolean siteIsMonomorphic = true;
|
boolean siteIsMonomorphic = true;
|
||||||
for (final Allele alternativeAllele : alleles) {
|
int referenceAlleleSize = 0;
|
||||||
if (alternativeAllele.isReference()) continue;
|
for (final Allele allele : alleles) {
|
||||||
// we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g.
|
if (allele.isReference() ) {
|
||||||
// if we combined a ref / NON_REF gVCF with a ref / alt gVCF
|
referenceAlleleSize = allele.length();
|
||||||
final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && alternativeAllele == GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE;
|
} else {
|
||||||
final boolean isPlausible = afcr.isPolymorphicPhredScaledQual(alternativeAllele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING);
|
// we want to keep the NON_REF symbolic allele but only in the absence of a non-symbolic allele, e.g.
|
||||||
final boolean toOutput = isPlausible || forceKeepAllele(alternativeAllele) || isNonRefWhichIsLoneAltAllele;
|
// if we combined a ref / NON_REF gVCF with a ref / alt gVCF
|
||||||
|
final boolean isNonRefWhichIsLoneAltAllele = alternativeAlleleCount == 1 && allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||||
|
final boolean isPlausible = afcr.isPolymorphicPhredScaledQual(allele, configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||||
|
|
||||||
siteIsMonomorphic &= ! isPlausible;
|
siteIsMonomorphic &= !isPlausible;
|
||||||
if (!toOutput) continue;
|
boolean toOutput = (isPlausible || forceKeepAllele(allele) || isNonRefWhichIsLoneAltAllele);
|
||||||
outputAlleles[outputAlleleCount] = alternativeAllele;
|
if ( allele.equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED) ||
|
||||||
mleCounts[outputAlleleCount++] = afcr.getAlleleCountAtMLE(alternativeAllele);
|
allele.equals(Allele.SPAN_DEL) ) {
|
||||||
|
toOutput &= coveredByDeletion(vc);
|
||||||
|
}
|
||||||
|
if (toOutput) {
|
||||||
|
outputAlleles[outputAlleleCount] = allele;
|
||||||
|
mleCounts[outputAlleleCount++] = afcr.getAlleleCountAtMLE(allele);
|
||||||
|
recordDeletion(referenceAlleleSize, allele, vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new OutputAlleleSubset(outputAlleleCount,outputAlleles,mleCounts,siteIsMonomorphic);
|
return new OutputAlleleSubset(outputAlleleCount,outputAlleles,mleCounts,siteIsMonomorphic);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record deletion to keep
|
||||||
|
* Add deletions to a list.
|
||||||
|
*
|
||||||
|
* @param referenceAlleleSize reference allele length
|
||||||
|
* @param allele allele of interest
|
||||||
|
* @param vc variant context
|
||||||
|
*/
|
||||||
|
private void recordDeletion(final int referenceAlleleSize, final Allele allele, final VariantContext vc) {
|
||||||
|
final int deletionSize = referenceAlleleSize - allele.length();
|
||||||
|
|
||||||
|
// Allele ia a deletion
|
||||||
|
if (deletionSize > 0) {
|
||||||
|
final GenomeLoc genomeLoc = genomeLocParser.createGenomeLocOnContig(vc.getContig(), vc.getStart(), vc.getStart() + deletionSize);
|
||||||
|
upstreamDeletionsLoc.add(genomeLoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the variant context covered by an upstream deletion?
|
||||||
|
*
|
||||||
|
* @param vc variant context
|
||||||
|
* @return true if the location is covered by an upstream deletion, false otherwise
|
||||||
|
*/
|
||||||
|
private boolean coveredByDeletion(final VariantContext vc) {
|
||||||
|
for (Iterator<GenomeLoc> it = upstreamDeletionsLoc.iterator(); it.hasNext(); ) {
|
||||||
|
final GenomeLoc loc = it.next();
|
||||||
|
if (!loc.getContig().equals(vc.getContig())) { // past contig deletion.
|
||||||
|
it.remove();
|
||||||
|
} else if (loc.getStop() < vc.getStart()) { // past position in current contig deletion.
|
||||||
|
it.remove();
|
||||||
|
} else if (loc.getStart() == vc.getStart()) {
|
||||||
|
// ignore this deletion, the symbolic one does not make reference to it.
|
||||||
|
} else { // deletion covers.
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether even if the allele is not well supported by the data, we should keep it for genotyping.
|
* Checks whether even if the allele is not well supported by the data, we should keep it for genotyping.
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -661,4 +661,13 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testGenotypingSpanningDeletionWithAllSites", spec);
|
executeTest("testGenotypingSpanningDeletionWithAllSites", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGenotypingSpanningDeletionAcrossLines() {
|
||||||
|
final WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
baseTestString(" -V " + privateTestDir + "input-1_2256566.vcf", b37KGReference),
|
||||||
|
Collections.singletonList("1f914189326cdd17d0a8753f13cb221f"));
|
||||||
|
spec.disableShadowBCF();
|
||||||
|
executeTest("testGenotypingSpanningDeletionAcrossLines", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue