Bug fixes related to the changes in allele padding. If a haplotype started with an insertion it led to array index out of bounds. Haplotype allele insert function is now very simple because all alleles are treated the same way. HaplotypeUnitTest now uses a variant context instead of creating Allele objects directly.

This commit is contained in:
Ryan Poplin 2012-08-05 12:29:10 -04:00
parent c3b6e2b143
commit b7eec2fd0e
5 changed files with 47 additions and 81 deletions

View File

@ -533,27 +533,18 @@ public class GenotypingEngine {
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case I:
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
boolean allN = true;
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
if( insertionBases[i] != (byte) 'N' ) {
allN = false;
break;
}
}
if( !allN ) {
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
} else {
insertionAlleles.add( Allele.create(insertionBases, false) );
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
}
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
final int insertionStart = refLoc.getStart() + refPos - 1;
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
} else {
byte[] insertionBases = new byte[]{};
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
insertionAlleles.add( Allele.create(insertionBases, false) );
}
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
alignmentPos += elementLength;
break;
case S:

View File

@ -281,7 +281,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
@ -311,7 +311,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
}
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );

View File

@ -331,7 +331,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
double lodCutoff = 0.0;
for( final Tranche tranche : tranches ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001) == 0 ) {
lodCutoff = tranche.minVQSLod;
}
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -160,49 +161,17 @@ public class Haplotype {
}
@Requires({"refInsertLocation >= 0"})
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
return new Haplotype(bases.clone());
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
return null;
}
byte[] newHaplotype;
try {
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
newHaplotype = bases.clone();
for( int iii = 0; iii < altAllele.length(); iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
}
} else if( refAllele.length() < altAllele.length() ) { // insertion
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length + altAlleleLength];
for( int iii = 0; iii < bases.length; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
}
for( int iii = 0; iii < altAlleleLength; iii++ ) {
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
}
} else { // deletion
final int shift = refAllele.length() - altAllele.length();
final int altAlleleLength = altAllele.length() - 1;
newHaplotype = new byte[bases.length - shift];
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
newHaplotype[iii] = bases[iii];
}
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
newHaplotype[iii] = bases[iii+shift];
}
}
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
return new Haplotype(bases.clone());
}
return new Haplotype(newHaplotype);
byte[] newHaplotypeBases = new byte[]{};
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
return new Haplotype(newHaplotypeBases);
}
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,

View File

@ -31,6 +31,8 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -53,11 +55,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases);
}
@Test
@ -68,11 +70,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
}
@Test
@ -102,11 +104,11 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases);
}
@Test
@ -120,12 +122,12 @@ public class HaplotypeUnitTest extends BaseTest {
h1CigarList.add(new CigarElement(3, CigarOperator.D));
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
final Cigar h1Cigar = new Cigar(h1CigarList);
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC";
basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases);
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases);
}
@Test
@ -148,13 +150,16 @@ public class HaplotypeUnitTest extends BaseTest {
}
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
final Haplotype h = new Haplotype(hap.getBytes());
final Allele h1refAllele = Allele.create(ref, true);
final Allele h1altAllele = Allele.create(alt, false);
final ArrayList<Allele> alleles = new ArrayList<Allele>();
alleles.add(h1refAllele);
alleles.add(h1altAllele);
final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
h.setAlignmentStartHapwrtRef(0);
h.setCigar(cigar);
final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE);
final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
final Haplotype h1expected = new Haplotype(newHap.getBytes());
Assert.assertEquals(h1, h1expected);
}