leftAlignCigarSequentially now supports haplotypes with insertions and deletions where the deletion allele was previously removed by the leftAlignSingleIndel during it's cleanup phase.

This commit is contained in:
Mark DePristo 2013-04-19 16:11:29 -04:00
parent aefddaa219
commit 2bcbdd469f
3 changed files with 24 additions and 15 deletions

View File

@ -410,9 +410,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
// extend partial haplotypes which are anchored in the reference to include the full active region // extend partial haplotypes which are anchored in the reference to include the full active region
h = extendPartialHaplotype(h, activeRegionStart, refWithPadding); h = extendPartialHaplotype(h, activeRegionStart, refWithPadding);
final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0); final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0);
if( leftAlignedCigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // left alignment failure
continue;
}
if( !returnHaplotypes.contains(h) ) { if( !returnHaplotypes.contains(h) ) {
h.setAlignmentStartHapwrtRef(activeRegionStart); h.setAlignmentStartHapwrtRef(activeRegionStart);
h.setCigar(leftAlignedCigar); h.setCigar(leftAlignedCigar);
@ -548,9 +545,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
final CigarElement ce = cigar.getCigarElement(i); final CigarElement ce = cigar.getCigarElement(i);
if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) {
cigarToAlign.add(ce); cigarToAlign.add(ce);
for( final CigarElement toAdd : AlignmentUtils.leftAlignIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false).getCigarElements() ) { final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false);
cigarToReturn.add(toAdd); for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); }
}
refIndex += cigarToAlign.getReferenceLength(); refIndex += cigarToAlign.getReferenceLength();
readIndex += cigarToAlign.getReadLength(); readIndex += cigarToAlign.getReadLength();
cigarToAlign = new Cigar(); cigarToAlign = new Cigar();
@ -563,7 +559,11 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
cigarToReturn.add(toAdd); cigarToReturn.add(toAdd);
} }
} }
return cigarToReturn;
final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn);
if( result.getReferenceLength() != cigar.getReferenceLength() )
throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result);
return result;
} }
/** /**

View File

@ -52,10 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
* Date: 3/27/12 * Date: 3/27/12
*/ */
import net.sf.samtools.Cigar; import net.sf.samtools.*;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph;
import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -125,6 +122,17 @@ public class DeBruijnAssemblerUnitTest extends BaseTest {
} }
} }
@Test(enabled = true)
public void testLeftAlignCigarSequentiallyAdjacentID() {
final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT";
final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT";
final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M");
final Cigar result = new DeBruijnAssembler().leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0);
logger.warn("Result is " + result);
Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different");
}
private static class MockBuilder extends DeBruijnGraphBuilder { private static class MockBuilder extends DeBruijnGraphBuilder {
public final List<Kmer> addedPairs = new LinkedList<Kmer>(); public final List<Kmer> addedPairs = new LinkedList<Kmer>();
@ -165,7 +173,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest {
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }
@Test(dataProvider = "AddReadKmersToGraph") @Test(dataProvider = "AddReadKmersToGraph", enabled = ! DEBUG)
public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List<Integer> badQualsSites) { public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List<Integer> badQualsSites) {
final int readLen = bases.length(); final int readLen = bases.length();
final DeBruijnAssembler assembler = new DeBruijnAssembler(); final DeBruijnAssembler assembler = new DeBruijnAssembler();

View File

@ -664,7 +664,7 @@ public final class AlignmentUtils {
if ( numIndels == 0 ) if ( numIndels == 0 )
return cigar; return cigar;
if ( numIndels == 1 ) if ( numIndels == 1 )
return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex); return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex, true);
// if we got here then there is more than 1 indel in the alignment // if we got here then there is more than 1 indel in the alignment
if ( doNotThrowExceptionForMultipleIndels ) if ( doNotThrowExceptionForMultipleIndels )
@ -709,10 +709,11 @@ public final class AlignmentUtils {
* @param readSeq read sequence * @param readSeq read sequence
* @param refIndex 0-based alignment start position on ref * @param refIndex 0-based alignment start position on ref
* @param readIndex 0-based alignment start position on read * @param readIndex 0-based alignment start position on read
* @param cleanupCigar if true, we'll cleanup the resulting cigar element, removing 0 length elements and deletions from the first cigar position
* @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any) * @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
*/ */
@Ensures("result != null") @Ensures("result != null")
public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) { public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex, final boolean cleanupCigar) {
ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex); ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex);
int indexOfIndel = -1; int indexOfIndel = -1;
@ -751,7 +752,7 @@ public final class AlignmentUtils {
cigar = newCigar; cigar = newCigar;
i = -1; i = -1;
if (reachedEndOfRead) if (reachedEndOfRead)
cigar = cleanUpCigar(cigar); cigar = cleanupCigar ? cleanUpCigar(cigar) : cigar;
} }
if (reachedEndOfRead) if (reachedEndOfRead)