leftAlignCigarSequentially now supports haplotypes with insertions and deletions where the deletion allele was previously removed by the leftAlignSingleIndel during it's cleanup phase.

This commit is contained in:
Mark DePristo 2013-04-19 16:11:29 -04:00
parent aefddaa219
commit 2bcbdd469f
3 changed files with 24 additions and 15 deletions

View File

@ -410,9 +410,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
// extend partial haplotypes which are anchored in the reference to include the full active region
h = extendPartialHaplotype(h, activeRegionStart, refWithPadding);
final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0);
if( leftAlignedCigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // left alignment failure
continue;
}
if( !returnHaplotypes.contains(h) ) {
h.setAlignmentStartHapwrtRef(activeRegionStart);
h.setCigar(leftAlignedCigar);
@ -548,9 +545,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
final CigarElement ce = cigar.getCigarElement(i);
if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) {
cigarToAlign.add(ce);
for( final CigarElement toAdd : AlignmentUtils.leftAlignIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false).getCigarElements() ) {
cigarToReturn.add(toAdd);
}
final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false);
for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); }
refIndex += cigarToAlign.getReferenceLength();
readIndex += cigarToAlign.getReadLength();
cigarToAlign = new Cigar();
@ -563,7 +559,11 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
cigarToReturn.add(toAdd);
}
}
return cigarToReturn;
final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn);
if( result.getReferenceLength() != cigar.getReferenceLength() )
throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result);
return result;
}
/**

View File

@ -52,10 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
* Date: 3/27/12
*/
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph;
import org.broadinstitute.sting.utils.haplotype.Haplotype;
@ -125,6 +122,17 @@ public class DeBruijnAssemblerUnitTest extends BaseTest {
}
}
@Test(enabled = true)
public void testLeftAlignCigarSequentiallyAdjacentID() {
final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT";
final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT";
final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M");
final Cigar result = new DeBruijnAssembler().leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0);
logger.warn("Result is " + result);
Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different");
}
private static class MockBuilder extends DeBruijnGraphBuilder {
public final List<Kmer> addedPairs = new LinkedList<Kmer>();
@ -165,7 +173,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "AddReadKmersToGraph")
@Test(dataProvider = "AddReadKmersToGraph", enabled = ! DEBUG)
public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List<Integer> badQualsSites) {
final int readLen = bases.length();
final DeBruijnAssembler assembler = new DeBruijnAssembler();

View File

@ -664,7 +664,7 @@ public final class AlignmentUtils {
if ( numIndels == 0 )
return cigar;
if ( numIndels == 1 )
return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex);
return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex, true);
// if we got here then there is more than 1 indel in the alignment
if ( doNotThrowExceptionForMultipleIndels )
@ -709,10 +709,11 @@ public final class AlignmentUtils {
* @param readSeq read sequence
* @param refIndex 0-based alignment start position on ref
* @param readIndex 0-based alignment start position on read
* @param cleanupCigar if true, we'll cleanup the resulting cigar element, removing 0 length elements and deletions from the first cigar position
* @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any)
*/
@Ensures("result != null")
public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) {
public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex, final boolean cleanupCigar) {
ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex);
int indexOfIndel = -1;
@ -751,7 +752,7 @@ public final class AlignmentUtils {
cigar = newCigar;
i = -1;
if (reachedEndOfRead)
cigar = cleanUpCigar(cigar);
cigar = cleanupCigar ? cleanUpCigar(cigar) : cigar;
}
if (reachedEndOfRead)