Turning on allele trimming for the haplotype caller.
This commit is contained in:
parent
b543bddbb7
commit
2a9ee89c19
|
|
@ -283,7 +283,7 @@ public class GenotypingEngine {
|
||||||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||||
if( mergedVC == null ) { continue; }
|
if( mergedVC == null ) { continue; }
|
||||||
|
|
||||||
final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
|
HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
|
||||||
int aCount = 0;
|
int aCount = 0;
|
||||||
for( final Allele a : mergedVC.getAlleles() ) {
|
for( final Allele a : mergedVC.getAlleles() ) {
|
||||||
alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
|
alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
|
||||||
|
|
@ -308,9 +308,20 @@ public class GenotypingEngine {
|
||||||
}
|
}
|
||||||
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
|
genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
|
||||||
}
|
}
|
||||||
final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
|
VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
|
||||||
|
|
||||||
if( call != null ) {
|
if( call != null ) {
|
||||||
|
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||||
|
final VariantContext vcCallTrim = VariantContextUtils.reverseTrimAlleles(call);
|
||||||
|
// also, need to update the allele -> haplotype mapping
|
||||||
|
final HashMap<Allele, ArrayList<Haplotype>> alleleHashMapTrim = new HashMap<Allele, ArrayList<Haplotype>>();
|
||||||
|
for( int iii = 0; iii < vcCallTrim.getAlleles().size(); iii++ ) { // BUGBUG: this is assuming that the original and trimmed alleles maintain the same ordering in the VC
|
||||||
|
alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleHashMap.get(call.getAlleles().get(iii)));
|
||||||
|
}
|
||||||
|
|
||||||
|
call = vcCallTrim;
|
||||||
|
alleleHashMap = alleleHashMapTrim;
|
||||||
|
}
|
||||||
|
|
||||||
returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
|
returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,6 @@ import java.util.*;
|
||||||
public class LikelihoodCalculationEngine {
|
public class LikelihoodCalculationEngine {
|
||||||
|
|
||||||
private static final double LOG_ONE_HALF = -Math.log10(2.0);
|
private static final double LOG_ONE_HALF = -Math.log10(2.0);
|
||||||
private static final double BEST_LIKELIHOOD_THRESHOLD = 0.1;
|
|
||||||
private final byte constantGCP;
|
private final byte constantGCP;
|
||||||
private final boolean DEBUG;
|
private final boolean DEBUG;
|
||||||
private final PairHMM pairHMM;
|
private final PairHMM pairHMM;
|
||||||
|
|
@ -184,7 +183,7 @@ public class LikelihoodCalculationEngine {
|
||||||
haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
|
haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // MathUtils.approximateLog10SumLog10(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood); // BUGBUG: max or sum?
|
haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -323,11 +322,13 @@ public class LikelihoodCalculationEngine {
|
||||||
return bestHaplotypes;
|
return bestHaplotypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList, final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
|
||||||
|
final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
|
||||||
|
final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
|
||||||
|
final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call) {
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
|
||||||
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
|
||||||
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||||
//final Map<Allele, List<GATKSAMRecord>> alleleReadMap = new HashMap<Allele, List<GATKSAMRecord>>();
|
|
||||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||||
|
|
||||||
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
|
||||||
|
|
@ -352,7 +353,7 @@ public class LikelihoodCalculationEngine {
|
||||||
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
// only count the read if it overlaps the event, otherwise it is not added to the output read list at all
|
||||||
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
|
||||||
for( final Allele a : call.getFirst().getAlleles() )
|
for( final Allele a : call.getFirst().getAlleles() )
|
||||||
likelihoodMap.add(read,a,0.0);
|
likelihoodMap.add(read, a, 0.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHaplotypeCallerMultiSampleComplex() {
|
public void testHaplotypeCallerMultiSampleComplex() {
|
||||||
HCTestComplexVariants(CEUTRIO_BAM, "", "f5a809e3fbd9998f79b75bb2973209e1");
|
HCTestComplexVariants(CEUTRIO_BAM, "", "966da0de8466d21d79f1523488dff6bd");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||||
|
|
|
||||||
|
|
@ -508,10 +508,10 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext )
|
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // limitedContext callers need to handle allele trimming on their own to keep their perReadAlleleLikelihoodMap alleles in sync
|
||||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
||||||
|
|
||||||
if ( annotationEngine != null && !limitedContext ) {
|
if ( annotationEngine != null && !limitedContext ) { // limitedContext callers need to handle annotations on their own by calling their own annotationEngine
|
||||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||||
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
final ReadBackedPileup pileup = rawContext.getBasePileup();
|
||||||
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup);
|
||||||
|
|
|
||||||
|
|
@ -1340,10 +1340,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||||
|
|
||||||
// TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
|
||||||
|
|
||||||
// see whether we need to trim common reference base from all alleles
|
// see whether we need to trim common reference base from all alleles
|
||||||
|
|
||||||
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, false);
|
||||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||||
return inputVC;
|
return inputVC;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue