Haplotype/Allele based optimizations for the HaplotypeCaller that knock off nearly 20% of the total runtime (multi-sample).
These 2 changes improve runtime performance almost as much as Ryan's previous attempt (with ID-based comparisons): * Don't unnecessarily overload Allele.getBases() in the Haplotype class. * Haplotype.getBases() was calling clone() on the byte array. * Added a constructor to Allele (and Haplotype) that takes in an Allele as input. * It makes a copy of he given allele without having to go through the validation of the bases (since the Allele has already been validated). * Rev'ed the variant jar accordingly. For the reviewer: all tests passed before rebasing, so this should be good to go as far as correctness.
This commit is contained in:
parent
a954cf3c01
commit
6996a953a8
|
|
@ -368,7 +368,7 @@ public class GenotypingEngine {
|
|||
for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> readEntry : haplotypeReadMapEntry.getValue().getLikelihoodReadMap().entrySet() ) { // for each read
|
||||
double maxLikelihood = Double.NEGATIVE_INFINITY;
|
||||
for( final Map.Entry<Allele,Double> alleleDoubleEntry : readEntry.getValue().entrySet() ) { // for each input allele
|
||||
if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey().getBases())) ) { // exact match of haplotype base string
|
||||
if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey())) ) { // exact match of haplotype base string
|
||||
maxLikelihood = Math.max( maxLikelihood, alleleDoubleEntry.getValue() );
|
||||
}
|
||||
}
|
||||
|
|
@ -442,7 +442,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
// count up the co-occurrences of the events for the R^2 calculation
|
||||
for( final String sample : samples ) {
|
||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0];
|
||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h, true)) )[0][0];
|
||||
if( thisHapVC == null ) {
|
||||
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
|
||||
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
|
||||
|
|
|
|||
|
|
@ -125,7 +125,7 @@ public class LikelihoodCalculationEngine {
|
|||
final int numHaplotypes = haplotypes.size();
|
||||
final Map<Haplotype, Allele> alleleVersions = new HashMap<Haplotype, Allele>(numHaplotypes);
|
||||
for ( final Haplotype haplotype : haplotypes ) {
|
||||
alleleVersions.put(haplotype, Allele.create(haplotype.getBases()));
|
||||
alleleVersions.put(haplotype, Allele.create(haplotype, true));
|
||||
}
|
||||
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
|
|
@ -232,7 +232,7 @@ public class LikelihoodCalculationEngine {
|
|||
final List<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
|
||||
bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
|
||||
final List<Allele> haplotypesAsAlleles = new ArrayList<Allele>();
|
||||
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); }
|
||||
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h, true)); }
|
||||
|
||||
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, stratifiedReadMap, haplotypesAsAlleles ); // all samples pooled together
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,15 @@ public class Haplotype extends Allele {
|
|||
this(bases, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor. Note the ref state of the provided allele is ignored!
|
||||
*
|
||||
* @param allele allele to copy
|
||||
*/
|
||||
public Haplotype( final Allele allele ) {
|
||||
super(allele, true);
|
||||
}
|
||||
|
||||
protected Haplotype( final byte[] bases, final Event artificialEvent ) {
|
||||
this(bases, false);
|
||||
this.artificialEvent = artificialEvent;
|
||||
|
|
@ -94,10 +103,6 @@ public class Haplotype extends Allele {
|
|||
return getDisplayString();
|
||||
}
|
||||
|
||||
public byte[] getBases() {
|
||||
return super.getBases().clone();
|
||||
}
|
||||
|
||||
public long getStartPosition() {
|
||||
return genomeLocation.getStart();
|
||||
}
|
||||
|
|
@ -150,13 +155,15 @@ public class Haplotype extends Allele {
|
|||
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
|
||||
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
|
||||
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
|
||||
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= getBases().length ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
final byte[] myBases = this.getBases();
|
||||
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= myBases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
return null;
|
||||
}
|
||||
|
||||
byte[] newHaplotypeBases = new byte[]{};
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(getBases(), 0, haplotypeInsertLocation)); // bases before the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(getBases(), haplotypeInsertLocation + refAllele.length(), getBases().length)); // bases after the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant
|
||||
return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation));
|
||||
}
|
||||
|
||||
|
|
@ -199,7 +206,7 @@ public class Haplotype extends Allele {
|
|||
if (refAllele == null)
|
||||
throw new ReviewedStingException("BUG: no ref alleles in input to makeHaplotypeListfrom Alleles at loc: "+ startPos);
|
||||
|
||||
byte[] refBases = ref.getBases();
|
||||
final byte[] refBases = ref.getBases();
|
||||
|
||||
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
|
||||
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broadinstitute" module="variant" revision="1.84.1338" status="integration" />
|
||||
<info organisation="org.broadinstitute" module="variant" revision="1.85.1357" status="integration" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue