diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 1820ddbc9..a8c622a96 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -31,6 +31,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.*; @@ -41,6 +42,8 @@ public class Haplotype { private GenomeLoc genomeLocation = null; private HashMap readLikelihoodsPerSample = null; private boolean isRef = false; + private Cigar cigar; + private int alignmentStartHapwrtRef; /** * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual @@ -112,11 +115,7 @@ public class Haplotype { @Override public String toString() { - String returnString = ""; - for(int iii = 0; iii < bases.length; iii++) { - returnString += (char) bases[iii]; - } - return returnString; + return new String(bases); } public double[] getQuals() { @@ -134,11 +133,27 @@ public class Haplotype { return genomeLocation.getStop(); } - @Requires({"refInsertLocation >= 0", "hapStartInRefCoords >= 0"}) - public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation, final int hapStartInRefCoords, final Cigar haplotypeCigar ) { + public int getAlignmentStartHapwrtRef() { + return alignmentStartHapwrtRef; + } + + public void setAlignmentStartHapwrtRef( final int alignmentStartHapwrtRef ) { + this.alignmentStartHapwrtRef = alignmentStartHapwrtRef; + } + + public Cigar getCigar() { + return cigar; + } + + public void setCigar( final Cigar cigar ) { + this.cigar = cigar; + } + + @Requires({"refInsertLocation >= 0"}) + public byte[] insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) { if( refAllele.length() != altAllele.length() ) { refInsertLocation++; } - int haplotypeInsertLocation = getHaplotypeCoordinateForReferenceCoordinate(hapStartInRefCoords, haplotypeCigar, refInsertLocation); + int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true); if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype return bases.clone(); } @@ -233,85 +248,4 @@ public class Haplotype { return haplotypeMap; } - - // BUGBUG: copied from ReadClipper and slightly modified since we don't have the data in a GATKSAMRecord - private static Integer getHaplotypeCoordinateForReferenceCoordinate( final int haplotypeStart, final Cigar haplotypeCigar, final int refCoord ) { - int readBases = 0; - int refBases = 0; - boolean fallsInsideDeletion = false; - - int goal = refCoord - haplotypeStart; // The goal is to move this many reference bases - boolean goalReached = refBases == goal; - - Iterator cigarElementIterator = haplotypeCigar.getCigarElements().iterator(); - while (!goalReached && cigarElementIterator.hasNext()) { - CigarElement cigarElement = cigarElementIterator.next(); - int shift = 0; - - if (cigarElement.getOperator().consumesReferenceBases() || cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { - if (refBases + cigarElement.getLength() < goal) - shift = cigarElement.getLength(); - else - shift = goal - refBases; - - refBases += shift; - } - goalReached = refBases == goal; - - if (!goalReached && cigarElement.getOperator().consumesReadBases()) - readBases += cigarElement.getLength(); - - if (goalReached) { - // Is this base's reference position within this cigar element? Or did we use it all? - boolean endsWithinCigar = shift < cigarElement.getLength(); - - // If it isn't, we need to check the next one. There should *ALWAYS* be a next one - // since we checked if the goal coordinate is within the read length, so this is just a sanity check. - if (!endsWithinCigar && !cigarElementIterator.hasNext()) - return -1; - - CigarElement nextCigarElement; - - // if we end inside the current cigar element, we just have to check if it is a deletion - if (endsWithinCigar) - fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; - - // if we end outside the current cigar element, we need to check if the next element is an insertion or deletion. - else { - nextCigarElement = cigarElementIterator.next(); - - // if it's an insertion, we need to clip the whole insertion before looking at the next element - if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { - readBases += nextCigarElement.getLength(); - if (!cigarElementIterator.hasNext()) - return -1; - - nextCigarElement = cigarElementIterator.next(); - } - - // if it's a deletion, we will pass the information on to be handled downstream. - fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; - } - - // If we reached our goal outside a deletion, add the shift - if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases()) - readBases += shift; - - // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need - // to add the shift of the current cigar element but go back to it's last element to return the last - // base before the deletion (see warning in function contracts) - else if (fallsInsideDeletion && !endsWithinCigar) - readBases += shift - 1; - - // If we reached our goal inside a deletion then we must backtrack to the last base before the deletion - else if (fallsInsideDeletion && endsWithinCigar) - readBases--; - } - } - - if (!goalReached) - return -1; - - return (fallsInsideDeletion ? -1 : readBases); - } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index cbb4120dd..f9975148a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -49,6 +49,7 @@ public class ReadUtils { } private static int DEFAULT_ADAPTOR_SIZE = 100; + public static int CLIPPING_GOAL_NOT_REACHED = -1; /** * A marker to tell which end of the read has been clipped @@ -362,7 +363,11 @@ public class ReadUtils { @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd() || (read.getUnclippedEnd() < read.getUnclippedStart())"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) public static int getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord, ClippingTail tail) { - Pair result = getReadCoordinateForReferenceCoordinate(read, refCoord); + return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, tail, false); + } + + public static int getReadCoordinateForReferenceCoordinate(final int alignmentStart, final Cigar cigar, final int refCoord, final ClippingTail tail, final boolean allowGoalNotReached) { + Pair result = getReadCoordinateForReferenceCoordinate(alignmentStart, cigar, refCoord, allowGoalNotReached); int readCoord = result.getFirst(); // Corner case one: clipping the right tail and falls on deletion, move to the next @@ -374,9 +379,9 @@ public class ReadUtils { // clipping the left tail and first base is insertion, go to the next read coordinate // with the same reference coordinate. Advance to the next cigar element, or to the // end of the read if there is no next element. - Pair firstElementIsInsertion = readStartsWithInsertion(read); + Pair firstElementIsInsertion = readStartsWithInsertion(cigar); if (readCoord == 0 && tail == ClippingTail.LEFT_TAIL && firstElementIsInsertion.getFirst()) - readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), read.getReadLength() - 1); + readCoord = Math.min(firstElementIsInsertion.getSecond().getLength(), cigar.getReadLength() - 1); return readCoord; } @@ -400,14 +405,25 @@ public class ReadUtils { @Requires({"refCoord >= read.getSoftStart()", "refCoord <= read.getSoftEnd()"}) @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) public static Pair getReadCoordinateForReferenceCoordinate(GATKSAMRecord read, int refCoord) { + return getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), refCoord, false); + } + + public static Pair getReadCoordinateForReferenceCoordinate(final int alignmentStart, final Cigar cigar, final int refCoord, final boolean allowGoalNotReached) { int readBases = 0; int refBases = 0; boolean fallsInsideDeletion = false; - int goal = refCoord - read.getSoftStart(); // The goal is to move this many reference bases + int goal = refCoord - alignmentStart; // The goal is to move this many reference bases + if (goal < 0) { + if (allowGoalNotReached) { + return new Pair(CLIPPING_GOAL_NOT_REACHED, false); + } else { + throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + } + } boolean goalReached = refBases == goal; - Iterator cigarElementIterator = read.getCigar().getCigarElements().iterator(); + Iterator cigarElementIterator = cigar.getCigarElements().iterator(); while (!goalReached && cigarElementIterator.hasNext()) { CigarElement cigarElement = cigarElementIterator.next(); int shift = 0; @@ -431,8 +447,13 @@ public class ReadUtils { // If it isn't, we need to check the next one. There should *ALWAYS* be a next one // since we checked if the goal coordinate is within the read length, so this is just a sanity check. - if (!endsWithinCigar && !cigarElementIterator.hasNext()) - throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + if (!endsWithinCigar && !cigarElementIterator.hasNext()) { + if (allowGoalNotReached) { + return new Pair(CLIPPING_GOAL_NOT_REACHED, false); + } else { + throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + } + } CigarElement nextCigarElement; @@ -447,8 +468,13 @@ public class ReadUtils { // if it's an insertion, we need to clip the whole insertion before looking at the next element if (nextCigarElement.getOperator() == CigarOperator.INSERTION) { readBases += nextCigarElement.getLength(); - if (!cigarElementIterator.hasNext()) - throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + if (!cigarElementIterator.hasNext()) { + if (allowGoalNotReached) { + return new Pair(CLIPPING_GOAL_NOT_REACHED, false); + } else { + throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + } + } nextCigarElement = cigarElementIterator.next(); } @@ -473,8 +499,13 @@ public class ReadUtils { } } - if (!goalReached) - throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + if (!goalReached) { + if (allowGoalNotReached) { + return new Pair(CLIPPING_GOAL_NOT_REACHED, false); + } else { + throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); + } + } return new Pair(readBases, fallsInsideDeletion); } @@ -527,7 +558,11 @@ public class ReadUtils { * @return A pair with the answer (true/false) and the element or null if it doesn't exist */ public static Pair readStartsWithInsertion(GATKSAMRecord read) { - for (CigarElement cigarElement : read.getCigar().getCigarElements()) { + return readStartsWithInsertion(read.getCigar()); + } + + public static Pair readStartsWithInsertion(final Cigar cigar) { + for (CigarElement cigarElement : cigar.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.INSERTION) return new Pair(true, cigarElement); diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java index 86bc2d59b..87852f9ca 100644 --- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java @@ -152,7 +152,9 @@ public class HaplotypeUnitTest extends BaseTest { final Haplotype h = new Haplotype(hap.getBytes()); final Allele h1refAllele = Allele.create(ref, true); final Allele h1altAllele = Allele.create(alt, false); - final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE, 0, cigar) ); + h.setAlignmentStartHapwrtRef(0); + h.setCigar(cigar); + final Haplotype h1 = new Haplotype( h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE) ); final Haplotype h1expected = new Haplotype(newHap.getBytes()); Assert.assertEquals(h1, h1expected); }