Haplotype/Allele based optimizations for the HaplotypeCaller that knock off nearly 20% of the total runtime (multi-sample).

These 2 changes improve runtime performance almost as much as Ryan's previous attempt (with ID-based comparisons):
* Don't unnecessarily overload Allele.getBases() in the Haplotype class.
  * Haplotype.getBases() was calling clone() on the byte array.
* Added a constructor to Allele (and Haplotype) that takes in an Allele as input.
  * It makes a copy of he given allele without having to go through the validation of the bases (since the Allele has already been validated).
  * Rev'ed the variant jar accordingly.

For the reviewer: all tests passed before rebasing, so this should be good to go as far as correctness.
This commit is contained in:
Eric Banks 2013-02-20 13:52:55 -05:00
parent a954cf3c01
commit 6996a953a8
5 changed files with 20 additions and 13 deletions

View File

@ -368,7 +368,7 @@ public class GenotypingEngine {
for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> readEntry : haplotypeReadMapEntry.getValue().getLikelihoodReadMap().entrySet() ) { // for each read
double maxLikelihood = Double.NEGATIVE_INFINITY;
for( final Map.Entry<Allele,Double> alleleDoubleEntry : readEntry.getValue().entrySet() ) { // for each input allele
if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey().getBases())) ) { // exact match of haplotype base string
if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey())) ) { // exact match of haplotype base string
maxLikelihood = Math.max( maxLikelihood, alleleDoubleEntry.getValue() );
}
}
@ -442,7 +442,7 @@ public class GenotypingEngine {
}
// count up the co-occurrences of the events for the R^2 calculation
for( final String sample : samples ) {
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0];
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h, true)) )[0][0];
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }

View File

@ -125,7 +125,7 @@ public class LikelihoodCalculationEngine {
final int numHaplotypes = haplotypes.size();
final Map<Haplotype, Allele> alleleVersions = new HashMap<Haplotype, Allele>(numHaplotypes);
for ( final Haplotype haplotype : haplotypes ) {
alleleVersions.put(haplotype, Allele.create(haplotype.getBases()));
alleleVersions.put(haplotype, Allele.create(haplotype, true));
}
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
@ -232,7 +232,7 @@ public class LikelihoodCalculationEngine {
final List<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
final List<Allele> haplotypesAsAlleles = new ArrayList<Allele>();
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); }
for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h, true)); }
final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, stratifiedReadMap, haplotypesAsAlleles ); // all samples pooled together

View File

@ -61,6 +61,15 @@ public class Haplotype extends Allele {
this(bases, false);
}
/**
* Copy constructor. Note the ref state of the provided allele is ignored!
*
* @param allele allele to copy
*/
public Haplotype( final Allele allele ) {
super(allele, true);
}
protected Haplotype( final byte[] bases, final Event artificialEvent ) {
this(bases, false);
this.artificialEvent = artificialEvent;
@ -94,10 +103,6 @@ public class Haplotype extends Allele {
return getDisplayString();
}
public byte[] getBases() {
return super.getBases().clone();
}
public long getStartPosition() {
return genomeLocation.getStart();
}
@ -150,13 +155,15 @@ public class Haplotype extends Allele {
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= getBases().length ) { // desired change falls inside deletion so don't bother creating a new haplotype
final byte[] myBases = this.getBases();
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= myBases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
return null;
}
byte[] newHaplotypeBases = new byte[]{};
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(getBases(), 0, haplotypeInsertLocation)); // bases before the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, 0, haplotypeInsertLocation)); // bases before the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(getBases(), haplotypeInsertLocation + refAllele.length(), getBases().length)); // bases after the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(myBases, haplotypeInsertLocation + refAllele.length(), myBases.length)); // bases after the variant
return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation));
}
@ -199,7 +206,7 @@ public class Haplotype extends Allele {
if (refAllele == null)
throw new ReviewedStingException("BUG: no ref alleles in input to makeHaplotypeListfrom Alleles at loc: "+ startPos);
byte[] refBases = ref.getBases();
final byte[] refBases = ref.getBases();
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broadinstitute" module="variant" revision="1.84.1338" status="integration" />
<info organisation="org.broadinstitute" module="variant" revision="1.85.1357" status="integration" />
</ivy-module>