Lots more GGA fixes for the HC now that I understand what's going on internally. Integration tests pass except for the GGA test which I believe now produces better results.
This commit is contained in:
parent
4f243acaa6
commit
937ac7290f
|
|
@ -62,6 +62,7 @@ public class GenotypingEngine {
|
|||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
|
||||
final ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>>();
|
||||
final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
|
||||
|
||||
// Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
|
||||
final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
|
||||
|
|
@ -70,7 +71,7 @@ public class GenotypingEngine {
|
|||
for( final Haplotype h : haplotypes ) {
|
||||
// Walk along the alignment and turn any difference from the reference into an event
|
||||
h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
|
||||
if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
|
||||
if( !in_GGA_mode ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
|
||||
if( DEBUG ) {
|
||||
System.out.println( h.toString() );
|
||||
System.out.println( "> Cigar = " + h.getCigar() );
|
||||
|
|
@ -80,10 +81,10 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
cleanUpSymbolicUnassembledEvents( haplotypes );
|
||||
if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
|
||||
if( !in_GGA_mode && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
|
||||
mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
|
||||
}
|
||||
if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!
|
||||
if( in_GGA_mode ) {
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) {
|
||||
startPosKeySet.add( compVC.getStart() );
|
||||
}
|
||||
|
|
@ -95,7 +96,7 @@ public class GenotypingEngine {
|
|||
final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
|
||||
final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
|
||||
|
||||
if( activeAllelesToGenotype.isEmpty() ) {
|
||||
if( !in_GGA_mode ) {
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
|
||||
final VariantContext vc = eventMap.get(loc);
|
||||
|
|
@ -129,9 +130,8 @@ public class GenotypingEngine {
|
|||
final Allele refAllele = eventsAtThisLoc.get(0).getReference();
|
||||
final ArrayList<Allele> alleleOrdering = new ArrayList<Allele>(alleleMapper.size());
|
||||
alleleOrdering.add(refAllele);
|
||||
for ( final Allele allele : alleleMapper.keySet() ) {
|
||||
if ( !refAllele.equals(allele) )
|
||||
alleleOrdering.add(allele);
|
||||
for( final VariantContext vc : eventsAtThisLoc ) {
|
||||
alleleOrdering.add(vc.getAlternateAllele(0));
|
||||
}
|
||||
|
||||
// Sanity check the priority list
|
||||
|
|
@ -154,6 +154,16 @@ public class GenotypingEngine {
|
|||
final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
// let's update the Allele keys in the mapper because they can change after merging when there are complex events
|
||||
Map<Allele, List<Haplotype>> updatedAlleleMapper = new HashMap<Allele, List<Haplotype>>(alleleMapper.size());
|
||||
for ( int i = 0; i < mergedVC.getNAlleles(); i++ ) {
|
||||
final Allele oldAllele = alleleOrdering.get(i);
|
||||
final Allele newAllele = mergedVC.getAlleles().get(i);
|
||||
updatedAlleleMapper.put(newAllele, alleleMapper.get(oldAllele));
|
||||
alleleOrdering.set(i, newAllele);
|
||||
}
|
||||
alleleMapper = updatedAlleleMapper;
|
||||
|
||||
if( DEBUG ) {
|
||||
System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
|
||||
//System.out.println("Event/haplotype allele mapping = " + alleleMapper);
|
||||
|
|
@ -358,48 +368,48 @@ public class GenotypingEngine {
|
|||
@Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
|
||||
protected static Map<Allele, List<Haplotype>> createAlleleMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
|
||||
|
||||
final Allele refAllele = eventsAtThisLoc.get(0).getReference();
|
||||
|
||||
final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>(eventsAtThisLoc.size()+1);
|
||||
final Allele refAllele = eventsAtThisLoc.get(0).getReference();
|
||||
alleleMapper.put(refAllele, new ArrayList<Haplotype>());
|
||||
for( final VariantContext vc : eventsAtThisLoc )
|
||||
alleleMapper.put(vc.getAlternateAllele(0), new ArrayList<Haplotype>());
|
||||
|
||||
final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
|
||||
if ( !alleleMapper.containsKey(refAllele) )
|
||||
alleleMapper.put(refAllele, new ArrayList<Haplotype>());
|
||||
alleleMapper.get(refAllele).add(h);
|
||||
} else if ( h.isArtificialHaplotype() ) {
|
||||
if ( !alleleMapper.containsKey(h.getArtificialAllele()) )
|
||||
alleleMapper.put(h.getArtificialAllele(), new ArrayList<Haplotype>());
|
||||
} else if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
|
||||
alleleMapper.get(h.getArtificialAllele()).add(h);
|
||||
} else {
|
||||
boolean haplotypeIsDetermined = false;
|
||||
for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
|
||||
if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
|
||||
final Allele altAllele = vcAtThisLoc.getAlternateAllele(0);
|
||||
if ( !alleleMapper.containsKey(altAllele) )
|
||||
alleleMapper.put(altAllele, new ArrayList<Haplotype>());
|
||||
alleleMapper.get(altAllele).add(h);
|
||||
alleleMapper.get(vcAtThisLoc.getAlternateAllele(0)).add(h);
|
||||
haplotypeIsDetermined = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if( !haplotypeIsDetermined )
|
||||
undeterminedHaplotypes.add(h);
|
||||
}
|
||||
}
|
||||
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
if ( h.getEventMap().get(loc) == null || h.isArtificialHaplotype() )
|
||||
continue;
|
||||
|
||||
for( final Haplotype h : undeterminedHaplotypes ) {
|
||||
Allele matchingAllele = null;
|
||||
for ( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
|
||||
if ( alleleToTest.getKey().equals(refAllele) )
|
||||
for( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
|
||||
// don't test against the reference allele
|
||||
if( alleleToTest.getKey().equals(refAllele) )
|
||||
continue;
|
||||
|
||||
final Haplotype artificialHaplotype = alleleToTest.getValue().get(0);
|
||||
if ( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap()) ) {
|
||||
if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) {
|
||||
matchingAllele = alleleToTest.getKey();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( matchingAllele == null )
|
||||
if( matchingAllele == null )
|
||||
matchingAllele = refAllele;
|
||||
alleleMapper.get(matchingAllele).add(h);
|
||||
}
|
||||
|
|
@ -407,20 +417,36 @@ public class GenotypingEngine {
|
|||
return alleleMapper;
|
||||
}
|
||||
|
||||
protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset) {
|
||||
protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset, final boolean resolveSupersetToSubset) {
|
||||
|
||||
for ( final Map.Entry<Integer, VariantContext> fromSubset : subset.entrySet() ) {
|
||||
final VariantContext fromSuperset = superset.get(fromSubset.getKey());
|
||||
if ( fromSuperset == null )
|
||||
return false;
|
||||
|
||||
if ( !fromSuperset.hasAlternateAllele(fromSubset.getValue().getAlternateAllele(0)) )
|
||||
List<Allele> supersetAlleles = fromSuperset.getAlternateAlleles();
|
||||
if ( resolveSupersetToSubset )
|
||||
supersetAlleles = resolveAlternateAlleles(fromSubset.getValue().getReference(), fromSuperset.getReference(), supersetAlleles);
|
||||
|
||||
if ( !supersetAlleles.contains(fromSubset.getValue().getAlternateAllele(0)) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static List<Allele> resolveAlternateAlleles(final Allele targetReference, final Allele actualReference, final List<Allele> currentAlleles) {
|
||||
if ( targetReference.length() <= actualReference.length() )
|
||||
return currentAlleles;
|
||||
|
||||
final List<Allele> newAlleles = new ArrayList<Allele>(currentAlleles.size());
|
||||
final byte[] extraBases = Arrays.copyOfRange(targetReference.getBases(), actualReference.length(), targetReference.length());
|
||||
for ( final Allele a : currentAlleles ) {
|
||||
newAlleles.add(Allele.extend(a, extraBases));
|
||||
}
|
||||
return newAlleles;
|
||||
}
|
||||
|
||||
@Ensures({"result.size() == haplotypeAllelesForSample.size()"})
|
||||
protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
|
||||
if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
|
||||
|
|
|
|||
|
|
@ -196,8 +196,8 @@ public class LikelihoodCalculationEngine {
|
|||
}
|
||||
|
||||
// This function takes a set of samples to pool over and a haplotypeMapping
|
||||
@Requires({"haplotypeMapping.size() > 0"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
|
||||
@Requires({"haplotypeList.size() > 0"})
|
||||
@Ensures({"result.length == result[0].length", "result.length == haplotypeList.size()"})
|
||||
public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final List<Haplotype> haplotypeList ) {
|
||||
|
||||
final int numHaplotypes = haplotypeList.size();
|
||||
|
|
|
|||
|
|
@ -278,9 +278,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
|
||||
final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();
|
||||
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype
|
||||
// for GGA mode, add the desired allele into the haplotype
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) {
|
||||
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
|
||||
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart());
|
||||
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
|
||||
if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
||||
return returnHaplotypes;
|
||||
//throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
|
||||
|
|
@ -290,15 +291,24 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
|
||||
for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
|
||||
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
|
||||
|
||||
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
|
||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
||||
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
|
||||
// for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
if( !activeAllelesToGenotype.isEmpty() ) {
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
|
||||
|
||||
// This if statement used to additionally have:
|
||||
// "|| !vcOnHaplotype.hasSameAllelesAs(compVC)"
|
||||
// but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto
|
||||
// a haplotype that already contains a 1bp insertion (so practically it is reference but
|
||||
// falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
|
||||
if( vcOnHaplotype == null ) {
|
||||
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
|
||||
addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
|
||||
addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -370,7 +380,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
|
||||
h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
|
||||
if ( haplotype.isArtificialHaplotype() )
|
||||
h.setArtificialAllele(haplotype.getArtificialAllele());
|
||||
h.setArtificialAllele(haplotype.getArtificialAllele(), haplotype.getArtificialAllelePosition());
|
||||
h.leftBreakPoint = leftBreakPoint;
|
||||
h.rightBreakPoint = rightBreakPoint;
|
||||
if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
|
||||
|
|
|
|||
|
|
@ -29,9 +29,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
|
||||
}
|
||||
|
||||
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9");
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "f2d0309fdf50d5827e9c60ed0dd07e3f");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
|
|||
|
|
@ -50,7 +50,8 @@ public class Haplotype {
|
|||
public int leftBreakPoint = 0;
|
||||
public int rightBreakPoint = 0;
|
||||
private Allele artificialAllele = null;
|
||||
|
||||
private int artificialAllelePosition = -1;
|
||||
|
||||
/**
|
||||
* Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
|
||||
*
|
||||
|
|
@ -72,9 +73,10 @@ public class Haplotype {
|
|||
this(bases, 0);
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases, final Allele artificialAllele ) {
|
||||
protected Haplotype( final byte[] bases, final Allele artificialAllele, final int artificialAllelePosition ) {
|
||||
this(bases, 0);
|
||||
this.artificialAllele = artificialAllele;
|
||||
this.artificialAllelePosition = artificialAllelePosition;
|
||||
}
|
||||
|
||||
public Haplotype( final byte[] bases, final GenomeLoc loc ) {
|
||||
|
|
@ -185,12 +187,17 @@ public class Haplotype {
|
|||
return artificialAllele;
|
||||
}
|
||||
|
||||
public void setArtificialAllele(final Allele artificialAllele) {
|
||||
public int getArtificialAllelePosition() {
|
||||
return artificialAllelePosition;
|
||||
}
|
||||
|
||||
public void setArtificialAllele(final Allele artificialAllele, final int artificialAllelePosition) {
|
||||
this.artificialAllele = artificialAllele;
|
||||
this.artificialAllelePosition = artificialAllelePosition;
|
||||
}
|
||||
|
||||
@Requires({"refInsertLocation >= 0"})
|
||||
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
|
||||
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
|
||||
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
|
||||
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
|
||||
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
|
|
@ -200,7 +207,7 @@ public class Haplotype {
|
|||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
|
||||
return new Haplotype(newHaplotypeBases, altAllele);
|
||||
return new Haplotype(newHaplotypeBases, altAllele, genomicInsertLocation);
|
||||
}
|
||||
|
||||
public static class HaplotypeBaseComparator implements Comparator<Haplotype>, Serializable {
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
|
||||
h.setAlignmentStartHapwrtRef(0);
|
||||
h.setCigar(cigar);
|
||||
final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
|
||||
final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc, vc.getStart());
|
||||
final Haplotype h1expected = new Haplotype(newHap.getBytes());
|
||||
Assert.assertEquals(h1, h1expected);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue