Major restructuring of HaplotypeCaller's LikelihoodCalculationEngine and GenotypingEngine. We no longer create an ugly event dictionary and genotype events found on haplotypes independently by finding the haplotype with the max likelihood. Lots of code has been rewritten to be much cleaner.
This commit is contained in:
parent
47e5a80d0f
commit
78a4e7e45e
|
|
@ -32,16 +32,13 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.LinkedHashMap;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class Haplotype {
|
public class Haplotype {
|
||||||
protected final byte[] bases;
|
protected final byte[] bases;
|
||||||
protected final double[] quals;
|
protected final double[] quals;
|
||||||
private GenomeLoc genomeLocation = null;
|
private GenomeLoc genomeLocation = null;
|
||||||
private boolean isReference = false;
|
private HashMap<String, double[]> readLikelihoodsPerSample = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
||||||
|
|
@ -69,16 +66,26 @@ public class Haplotype {
|
||||||
this.genomeLocation = loc;
|
this.genomeLocation = loc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Haplotype(byte[] bases, GenomeLoc loc, boolean isRef) {
|
|
||||||
this(bases, loc);
|
|
||||||
this.isReference = isRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals( Object h ) {
|
public boolean equals( Object h ) {
|
||||||
return h instanceof Haplotype && Arrays.equals(bases, ((Haplotype) h).bases);
|
return h instanceof Haplotype && Arrays.equals(bases, ((Haplotype) h).bases);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addReadLikelihoods( final String sample, final double[] readLikelihoods ) {
|
||||||
|
if( readLikelihoodsPerSample == null ) {
|
||||||
|
readLikelihoodsPerSample = new HashMap<String, double[]>();
|
||||||
|
}
|
||||||
|
readLikelihoodsPerSample.put(sample, readLikelihoods);
|
||||||
|
}
|
||||||
|
|
||||||
|
public double[] getReadLikelihoods( final String sample ) {
|
||||||
|
return readLikelihoodsPerSample.get(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getSampleKeySet() {
|
||||||
|
return readLikelihoodsPerSample.keySet();
|
||||||
|
}
|
||||||
|
|
||||||
public double getQualitySum() {
|
public double getQualitySum() {
|
||||||
double s = 0;
|
double s = 0;
|
||||||
for (int k=0; k < bases.length; k++) {
|
for (int k=0; k < bases.length; k++) {
|
||||||
|
|
@ -87,6 +94,7 @@ public class Haplotype {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
String returnString = "";
|
String returnString = "";
|
||||||
for(int iii = 0; iii < bases.length; iii++) {
|
for(int iii = 0; iii < bases.length; iii++) {
|
||||||
|
|
@ -110,10 +118,6 @@ public class Haplotype {
|
||||||
return genomeLocation.getStop();
|
return genomeLocation.getStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isReference() {
|
|
||||||
return isReference;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
|
@Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"})
|
||||||
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
|
public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) {
|
||||||
|
|
||||||
|
|
@ -208,13 +212,14 @@ public class Haplotype {
|
||||||
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
||||||
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
||||||
|
|
||||||
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus, a.isReference()));
|
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return haplotypeMap;
|
return haplotypeMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BUGBUG: copied from ReadClipper and slightly modified since we don't have the data in a GATKSAMRecord
|
||||||
private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
|
private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) {
|
||||||
int readBases = 0;
|
int readBases = 0;
|
||||||
int refBases = 0;
|
int refBases = 0;
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ import com.google.java.contract.*;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
|
@ -344,6 +345,21 @@ public class VariantContextBuilder {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tells us that the resulting VariantContext should have the specified location
|
||||||
|
* @param loc
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Requires({"loc.getContig() != null", "loc.getStart() >= 0", "loc.getStop() >= 0"})
|
||||||
|
public VariantContextBuilder loc(final GenomeLoc loc) {
|
||||||
|
this.contig = loc.getContig();
|
||||||
|
this.start = loc.getStart();
|
||||||
|
this.stop = loc.getStop();
|
||||||
|
toValidate.add(VariantContext.Validation.ALLELES);
|
||||||
|
toValidate.add(VariantContext.Validation.REF_PADDING);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells us that the resulting VariantContext should have the specified contig chr
|
* Tells us that the resulting VariantContext should have the specified contig chr
|
||||||
* @param contig
|
* @param contig
|
||||||
|
|
|
||||||
|
|
@ -458,7 +458,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||||
* the sample name
|
* the sample name
|
||||||
*
|
*
|
||||||
* @param genomeLocParser loc parser
|
* @param genomeLocParser loc parser
|
||||||
|
|
@ -492,11 +492,11 @@ public class VariantContextUtils {
|
||||||
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
|
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
|
||||||
verifyUniqueSampleNames(unsortedVCs);
|
verifyUniqueSampleNames(unsortedVCs);
|
||||||
|
|
||||||
List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
final List<VariantContext> prepaddedVCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||||
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
// Make sure all variant contexts are padded with reference base in case of indels if necessary
|
||||||
List<VariantContext> VCs = new ArrayList<VariantContext>();
|
final List<VariantContext> VCs = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
for (VariantContext vc : prepaddedVCs) {
|
for (final VariantContext vc : prepaddedVCs) {
|
||||||
// also a reasonable place to remove filtered calls, if needed
|
// also a reasonable place to remove filtered calls, if needed
|
||||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||||
VCs.add(createVariantContextWithPaddedAlleles(vc, false));
|
VCs.add(createVariantContextWithPaddedAlleles(vc, false));
|
||||||
|
|
@ -531,7 +531,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||||
|
|
||||||
for ( VariantContext vc : VCs ) {
|
for ( final VariantContext vc : VCs ) {
|
||||||
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
||||||
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
throw new ReviewedStingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||||
|
|
||||||
|
|
@ -581,13 +581,13 @@ public class VariantContextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
|
for (final Map.Entry<String, Object> p : vc.getAttributes().entrySet()) {
|
||||||
String key = p.getKey();
|
String key = p.getKey();
|
||||||
// if we don't like the key already, don't go anywhere
|
// if we don't like the key already, don't go anywhere
|
||||||
if ( ! inconsistentAttributes.contains(key) ) {
|
if ( ! inconsistentAttributes.contains(key) ) {
|
||||||
boolean alreadyFound = attributes.containsKey(key);
|
boolean alreadyFound = attributes.containsKey(key);
|
||||||
Object boundValue = attributes.get(key);
|
Object boundValue = attributes.get(key);
|
||||||
boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
final boolean boundIsMissingValue = alreadyFound && boundValue.equals(VCFConstants.MISSING_VALUE_v4);
|
||||||
|
|
||||||
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
|
if ( alreadyFound && ! boundValue.equals(p.getValue()) && ! boundIsMissingValue ) {
|
||||||
// we found the value but we're inconsistent, put it in the exclude list
|
// we found the value but we're inconsistent, put it in the exclude list
|
||||||
|
|
@ -604,7 +604,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
// if we have more alternate alleles in the merged VC than in one or more of the
|
// if we have more alternate alleles in the merged VC than in one or more of the
|
||||||
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF
|
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate), as well as allele-dependent attributes like AC,AF
|
||||||
for ( VariantContext vc : VCs ) {
|
for ( final VariantContext vc : VCs ) {
|
||||||
if (vc.alleles.size() == 1)
|
if (vc.alleles.size() == 1)
|
||||||
continue;
|
continue;
|
||||||
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
|
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
|
||||||
|
|
@ -634,11 +634,11 @@ public class VariantContextUtils {
|
||||||
setValue = MERGE_INTERSECTION;
|
setValue = MERGE_INTERSECTION;
|
||||||
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
||||||
setValue = MERGE_FILTER_IN_ALL;
|
setValue = MERGE_FILTER_IN_ALL;
|
||||||
else if ( variantSources.isEmpty() ) // everyone was reference
|
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||||
setValue = MERGE_REF_IN_ALL;
|
setValue = MERGE_REF_IN_ALL;
|
||||||
else {
|
else {
|
||||||
LinkedHashSet<String> s = new LinkedHashSet<String>();
|
final LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||||
for ( VariantContext vc : VCs )
|
for ( final VariantContext vc : VCs )
|
||||||
if ( vc.isVariant() )
|
if ( vc.isVariant() )
|
||||||
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
s.add( vc.isFiltered() ? MERGE_FILTER_PREFIX + vc.getSource() : vc.getSource() );
|
||||||
setValue = Utils.join("-", s);
|
setValue = Utils.join("-", s);
|
||||||
|
|
@ -663,7 +663,7 @@ public class VariantContextUtils {
|
||||||
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
builder.filters(filters).attributes(mergeInfoWithMaxAC ? attributesWithMaxAC : attributes);
|
||||||
|
|
||||||
// Trim the padded bases of all alleles if necessary
|
// Trim the padded bases of all alleles if necessary
|
||||||
VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
|
final VariantContext merged = createVariantContextWithTrimmedAlleles(builder.make());
|
||||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||||
return merged;
|
return merged;
|
||||||
}
|
}
|
||||||
|
|
@ -724,7 +724,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||||
|
|
||||||
for (Allele a : inputVC.getAlleles()) {
|
for (final Allele a : inputVC.getAlleles()) {
|
||||||
if (a.isSymbolic()) {
|
if (a.isSymbolic()) {
|
||||||
alleles.add(a);
|
alleles.add(a);
|
||||||
originalToTrimmedAlleleMap.put(a, a);
|
originalToTrimmedAlleleMap.put(a, a);
|
||||||
|
|
@ -741,11 +741,9 @@ public class VariantContextUtils {
|
||||||
// example: mixed records such as {TA*,TGA,TG}
|
// example: mixed records such as {TA*,TGA,TG}
|
||||||
boolean hasNullAlleles = false;
|
boolean hasNullAlleles = false;
|
||||||
|
|
||||||
for (Allele a: originalToTrimmedAlleleMap.values()) {
|
for (final Allele a: originalToTrimmedAlleleMap.values()) {
|
||||||
if (a.isNull())
|
if (a.isNull())
|
||||||
hasNullAlleles = true;
|
hasNullAlleles = true;
|
||||||
if (a.isReference())
|
|
||||||
refAllele = a;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!hasNullAlleles)
|
if (!hasNullAlleles)
|
||||||
|
|
@ -755,7 +753,7 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
List<Allele> originalAlleles = genotype.getAlleles();
|
List<Allele> originalAlleles = genotype.getAlleles();
|
||||||
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||||
for ( Allele a : originalAlleles ) {
|
for ( final Allele a : originalAlleles ) {
|
||||||
if ( a.isCalled() )
|
if ( a.isCalled() )
|
||||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||||
else
|
else
|
||||||
|
|
@ -837,7 +835,6 @@ public class VariantContextUtils {
|
||||||
public AlleleMapper(Map<Allele, Allele> map) { this.map = map; }
|
public AlleleMapper(Map<Allele, Allele> map) { this.map = map; }
|
||||||
public boolean needsRemapping() { return this.map != null; }
|
public boolean needsRemapping() { return this.map != null; }
|
||||||
public Collection<Allele> values() { return map != null ? map.values() : vc.getAlleles(); }
|
public Collection<Allele> values() { return map != null ? map.values() : vc.getAlleles(); }
|
||||||
|
|
||||||
public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; }
|
public Allele remap(Allele a) { return map != null && map.containsKey(a) ? map.get(a) : a; }
|
||||||
|
|
||||||
public List<Allele> remap(List<Allele> as) {
|
public List<Allele> remap(List<Allele> as) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue