From 2bcbdd469f1c7a48a0c933e652d1c488274280b0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Apr 2013 16:11:29 -0400 Subject: [PATCH] leftAlignCigarSequentially now supports haplotypes with insertions and deletions where the deletion allele was previously removed by the leftAlignSingleIndel during it's cleanup phase. --- .../haplotypecaller/DeBruijnAssembler.java | 14 +++++++------- .../DeBruijnAssemblerUnitTest.java | 18 +++++++++++++----- .../sting/utils/sam/AlignmentUtils.java | 7 ++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 12a4841bf..5a5946183 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -410,9 +410,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // extend partial haplotypes which are anchored in the reference to include the full active region h = extendPartialHaplotype(h, activeRegionStart, refWithPadding); final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0); - if( leftAlignedCigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // left alignment failure - continue; - } if( !returnHaplotypes.contains(h) ) { h.setAlignmentStartHapwrtRef(activeRegionStart); h.setCigar(leftAlignedCigar); @@ -548,9 +545,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { final CigarElement ce = cigar.getCigarElement(i); if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { cigarToAlign.add(ce); - for( final CigarElement toAdd : AlignmentUtils.leftAlignIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false).getCigarElements() ) { - cigarToReturn.add(toAdd); - } + final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false); + for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); } refIndex += cigarToAlign.getReferenceLength(); readIndex += cigarToAlign.getReadLength(); cigarToAlign = new Cigar(); @@ -563,7 +559,11 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { cigarToReturn.add(toAdd); } } - return cigarToReturn; + + final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn); + if( result.getReferenceLength() != cigar.getReferenceLength() ) + throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result); + return result; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index e6dea4d11..e1559a13a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -52,10 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; * Date: 3/27/12 */ -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; import org.broadinstitute.sting.utils.haplotype.Haplotype; @@ -125,6 +122,17 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testLeftAlignCigarSequentiallyAdjacentID() { + final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; + final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; + final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M"); + + final Cigar result = new DeBruijnAssembler().leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0); + logger.warn("Result is " + result); + Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); + } + private static class MockBuilder extends DeBruijnGraphBuilder { public final List addedPairs = new LinkedList(); @@ -165,7 +173,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "AddReadKmersToGraph") + @Test(dataProvider = "AddReadKmersToGraph", enabled = ! DEBUG) public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List badQualsSites) { final int readLen = bases.length(); final DeBruijnAssembler assembler = new DeBruijnAssembler(); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e48d1ca4c..59a22c550 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -664,7 +664,7 @@ public final class AlignmentUtils { if ( numIndels == 0 ) return cigar; if ( numIndels == 1 ) - return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex); + return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex, true); // if we got here then there is more than 1 indel in the alignment if ( doNotThrowExceptionForMultipleIndels ) @@ -709,10 +709,11 @@ public final class AlignmentUtils { * @param readSeq read sequence * @param refIndex 0-based alignment start position on ref * @param readIndex 0-based alignment start position on read + * @param cleanupCigar if true, we'll cleanup the resulting cigar element, removing 0 length elements and deletions from the first cigar position * @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any) */ @Ensures("result != null") - public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) { + public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex, final boolean cleanupCigar) { ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex); int indexOfIndel = -1; @@ -751,7 +752,7 @@ public final class AlignmentUtils { cigar = newCigar; i = -1; if (reachedEndOfRead) - cigar = cleanUpCigar(cigar); + cigar = cleanupCigar ? cleanUpCigar(cigar) : cigar; } if (reachedEndOfRead)