From c87ad8c0ef9868a86463e58343322261b0e4fe77 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 10:00:46 -0500 Subject: [PATCH 1/2] Bug fixes related to HC's GGA mode. Tracking just the artificial allele isn't sufficient when there are multiple GGA records that change the reference basis. Also, duplicated records screw up the tracking of merged alleles. --- .../haplotypecaller/GenotypingEngine.java | 33 +++++++++----- .../SimpleDeBruijnAssembler.java | 5 ++- .../broadinstitute/sting/utils/Haplotype.java | 43 +++++++++++++------ 3 files changed, 55 insertions(+), 26 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 0c7e43525..7c25ab551 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -115,19 +115,31 @@ public class GenotypingEngine { } } } else { // we are in GGA mode! + int compCount = 0; for( final VariantContext compVC : activeAllelesToGenotype ) { if( compVC.getStart() == loc ) { - priorityList.clear(); int alleleCount = 0; for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { ArrayList alleleSet = new ArrayList(2); alleleSet.add(compVC.getReference()); alleleSet.add(compAltAllele); - priorityList.add("Allele" + alleleCount); - eventsAtThisLoc.add(new VariantContextBuilder(compVC).alleles(alleleSet).source("Allele"+alleleCount).make()); + final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; + // check if this event is already in the list of events due to a repeat in the input alleles track + final VariantContext candidateEventToAdd = new VariantContextBuilder(compVC).alleles(alleleSet).source(vcSourceName).make(); + boolean alreadyExists = false; + for( final VariantContext eventToTest : eventsAtThisLoc ) { + if( eventToTest.hasSameAllelesAs(candidateEventToAdd) ) { + alreadyExists = true; + } + } + if( !alreadyExists ) { + priorityList.add(vcSourceName); + eventsAtThisLoc.add(candidateEventToAdd); + } alleleCount++; } } + compCount++; } } @@ -137,14 +149,14 @@ public class GenotypingEngine { final Map> eventMapper = createEventMapper(loc, eventsAtThisLoc, haplotypes); // Sanity check the priority list for mistakes - sanityCheckPriorityList( priorityList, eventsAtThisLoc ); + validatePriorityList( priorityList, eventsAtThisLoc ); // Merge the event to find a common reference representation final VariantContext mergedVC = VariantContextUtils.simpleMerge(eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); if( mergedVC == null ) { continue; } if( eventsAtThisLoc.size() != mergedVC.getAlternateAlleles().size() ) { - throw new ReviewedStingException("Something went wrong in the merging of alleles."); + throw new ReviewedStingException("Record size mismatch! Something went wrong in the merging of alleles."); } final HashMap mergeMap = new HashMap(); mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele @@ -198,7 +210,7 @@ public class GenotypingEngine { return genotypes; } - private void sanityCheckPriorityList( final ArrayList priorityList, final ArrayList eventsAtThisLoc ) { + private void validatePriorityList( final ArrayList priorityList, final ArrayList eventsAtThisLoc ) { for( final VariantContext vc : eventsAtThisLoc ) { if( !priorityList.contains(vc.getSource()) ) { throw new ReviewedStingException("Event found on haplotype that wasn't added to priority list. Something went wrong in the merging of alleles."); @@ -453,8 +465,7 @@ public class GenotypingEngine { protected static Map> createEventMapper( final int loc, final List eventsAtThisLoc, final List haplotypes ) { final Map> eventMapper = new HashMap>(eventsAtThisLoc.size()+1); - final Allele refAllele = eventsAtThisLoc.get(0).getReference(); - VariantContext refVC = eventsAtThisLoc.get(0); + VariantContext refVC = eventsAtThisLoc.get(0); // the genome loc is the only safe thing to pull out of this VC because ref/alt pairs might change reference basis eventMapper.put(new Event(null), new ArrayList()); for( final VariantContext vc : eventsAtThisLoc ) { eventMapper.put(new Event(vc), new ArrayList()); @@ -464,11 +475,11 @@ public class GenotypingEngine { for( final Haplotype h : haplotypes ) { if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() ) { final ArrayList alleles = new ArrayList(2); - alleles.add(refAllele); - alleles.add(h.getArtificialAllele()); + alleles.add(h.getArtificialRefAllele()); + alleles.add(h.getArtificialAltAllele()); final Event artificialVC = new Event( (new VariantContextBuilder()).source("artificialHaplotype") .alleles(alleles) - .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + refAllele.length() - 1).make() ); + .loc(refVC.getChr(), refVC.getStart(), refVC.getStart() + h.getArtificialRefAllele().length() - 1).make() ); if( eventMapper.containsKey(artificialVC) ) { eventMapper.get(artificialVC).add(h); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 0a98f54e9..87db5c9f7 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -379,8 +379,9 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() ); h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) ); - if ( haplotype.isArtificialHaplotype() ) - h.setArtificialAllele(haplotype.getArtificialAllele(), haplotype.getArtificialAllelePosition()); + if ( haplotype.isArtificialHaplotype() ) { + h.setArtificialEvent(haplotype.getArtificialEvent()); + } h.leftBreakPoint = leftBreakPoint; h.rightBreakPoint = rightBreakPoint; if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 2476a666e..dfca05fcd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -46,8 +46,7 @@ public class Haplotype { private int alignmentStartHapwrtRef; public int leftBreakPoint = 0; public int rightBreakPoint = 0; - private Allele artificialAllele = null; - private int artificialAllelePosition = -1; + private Event artificialEvent = null; /** * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual @@ -70,10 +69,9 @@ public class Haplotype { this(bases, 0); } - protected Haplotype( final byte[] bases, final Allele artificialAllele, final int artificialAllelePosition ) { + protected Haplotype( final byte[] bases, final Event artificialEvent ) { this(bases, 0); - this.artificialAllele = artificialAllele; - this.artificialAllelePosition = artificialAllelePosition; + this.artificialEvent = artificialEvent; } public Haplotype( final byte[] bases, final GenomeLoc loc ) { @@ -152,20 +150,27 @@ public class Haplotype { } public boolean isArtificialHaplotype() { - return artificialAllele != null; + return artificialEvent != null; } - public Allele getArtificialAllele() { - return artificialAllele; + public Event getArtificialEvent() { + return artificialEvent; + } + + public Allele getArtificialRefAllele() { + return artificialEvent.ref; + } + + public Allele getArtificialAltAllele() { + return artificialEvent.alt; } public int getArtificialAllelePosition() { - return artificialAllelePosition; + return artificialEvent.pos; } - public void setArtificialAllele(final Allele artificialAllele, final int artificialAllelePosition) { - this.artificialAllele = artificialAllele; - this.artificialAllelePosition = artificialAllelePosition; + public void setArtificialEvent( final Event artificialEvent ) { + this.artificialEvent = artificialEvent; } @Requires({"refInsertLocation >= 0"}) @@ -179,7 +184,7 @@ public class Haplotype { newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant - return new Haplotype(newHaplotypeBases, altAllele, genomicInsertLocation); + return new Haplotype(newHaplotypeBases, new Event(refAllele, altAllele, genomicInsertLocation)); } public static class HaplotypeBaseComparator implements Comparator, Serializable { @@ -244,4 +249,16 @@ public class Haplotype { return haplotypeMap; } + + private static class Event { + public Allele ref; + public Allele alt; + public int pos; + + public Event( final Allele ref, final Allele alt, final int pos ) { + this.ref = ref; + this.alt = alt; + this.pos = pos; + } + } } From 396bce1f28ee622f3ebfd1237e975a3cd30e16ba Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 9 Jan 2013 10:51:30 -0500 Subject: [PATCH 2/2] Reverting this change until we can figure out the right thing to do here. --- .../walkers/haplotypecaller/LikelihoodCalculationEngine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 8e90285d9..e416b489b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -124,8 +124,8 @@ public class LikelihoodCalculationEngine { final Haplotype haplotype = haplotypes.get(jjj); // TODO -- need to test against a reference/position with non-standard bases - if ( !Allele.acceptableAlleleBases(haplotype.getBases(), false) ) - continue; + //if ( !Allele.acceptableAlleleBases(haplotype.getBases(), false) ) + // continue; final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) ); previousHaplotypeSeen = haplotype;