Bug fixes related to the changes in allele padding. If a haplotype started with an insertion it led to array index out of bounds. Haplotype allele insert function is now very simple because all alleles are treated the same way. HaplotypeUnitTest now uses a variant context instead of creating Allele objects directly.

This commit is contained in:
Ryan Poplin 2012-08-05 12:29:10 -04:00
parent c3b6e2b143
commit b7eec2fd0e
5 changed files with 47 additions and 81 deletions

View File

@ -533,27 +533,18 @@ public class GenotypingEngine {
final int elementLength = ce.getLength(); final int elementLength = ce.getLength();
switch( ce.getOperator() ) { switch( ce.getOperator() ) {
case I: case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
boolean allN = true; final int insertionStart = refLoc.getStart() + refPos - 1;
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( insertionBases[i] != (byte) 'N' ) { if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
allN = false; insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
break; } else {
} byte[] insertionBases = new byte[]{};
} insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
if( !allN ) { insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>(); insertionAlleles.add( Allele.create(insertionBases, false) );
final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else {
insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
} }
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
alignmentPos += elementLength; alignmentPos += elementLength;
break; break;
case S: case S:

View File

@ -281,7 +281,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) { if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart()); final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) { if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
@ -311,7 +311,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
} }
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) { private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart; if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND ); final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() ); haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) ); haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );

View File

@ -331,7 +331,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset. // Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
double lodCutoff = 0.0; double lodCutoff = 0.0;
for( final Tranche tranche : tranches ) { for( final Tranche tranche : tranches ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) { if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001) == 0 ) {
lodCutoff = tranche.minVQSLod; lodCutoff = tranche.minVQSLod;
} }
} }

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.Cigar; import net.sf.samtools.Cigar;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -160,49 +161,17 @@ public class Haplotype {
} }
@Requires({"refInsertLocation >= 0"}) @Requires({"refInsertLocation >= 0"})
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) { public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true); final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
return new Haplotype(bases.clone()); return null;
} }
byte[] newHaplotype; byte[] newHaplotypeBases = new byte[]{};
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
try { newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
if( refAllele.length() == altAllele.length() ) { // SNP or MNP newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
newHaplotype = bases.clone(); return new Haplotype(newHaplotypeBases);
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
}
for( int iii = 0; iii < altAlleleLength; iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
}
} else { // deletion
final int shift = refAllele.length() - altAllele.length();
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift];
}
}
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
return new Haplotype(bases.clone());
}
return new Haplotype(newHaplotype);
} }
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList, public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,

View File

@ -31,6 +31,8 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator; import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeClass; import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test; import org.testng.annotations.Test;
@ -53,11 +55,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA"; String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA"; h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA"; h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -68,11 +70,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA"; h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -102,11 +104,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases); basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases); basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases); basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -120,12 +122,12 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(3, CigarOperator.D)); h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList); final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases); basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC";
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases); basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC"; h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases); basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases);
} }
@Test @Test
@ -148,13 +150,16 @@ public class HaplotypeUnitTest extends BaseTest {
} }
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) { private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
final Haplotype h = new Haplotype(hap.getBytes()); final Haplotype h = new Haplotype(hap.getBytes());
final Allele h1refAllele = Allele.create(ref, true); final Allele h1refAllele = Allele.create(ref, true);
final Allele h1altAllele = Allele.create(alt, false); final Allele h1altAllele = Allele.create(alt, false);
final ArrayList<Allele> alleles = new ArrayList<Allele>();
alleles.add(h1refAllele);
alleles.add(h1altAllele);
final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
h.setAlignmentStartHapwrtRef(0); h.setAlignmentStartHapwrtRef(0);
h.setCigar(cigar); h.setCigar(cigar);
final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE); final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
final Haplotype h1expected = new Haplotype(newHap.getBytes()); final Haplotype h1expected = new Haplotype(newHap.getBytes());
Assert.assertEquals(h1, h1expected); Assert.assertEquals(h1, h1expected);
} }