Bug fixes related to the changes in allele padding. If a haplotype started with an insertion it led to array index out of bounds. Haplotype allele insert function is now very simple because all alleles are treated the same way. HaplotypeUnitTest now uses a variant context instead of creating Allele objects directly.
This commit is contained in:
parent
c3b6e2b143
commit
b7eec2fd0e
|
|
@ -533,27 +533,18 @@ public class GenotypingEngine {
|
|||
final int elementLength = ce.getLength();
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
final byte[] insertionBases = Arrays.copyOfRange( alignment, alignmentPos - 1, alignmentPos + elementLength ); // add padding base
|
||||
boolean allN = true;
|
||||
for( int i = 1; i < insertionBases.length; i++ ) { // check all bases except for the padding base
|
||||
if( insertionBases[i] != (byte) 'N' ) {
|
||||
allN = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if( !allN ) {
|
||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
} else {
|
||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
}
|
||||
|
||||
final ArrayList<Allele> insertionAlleles = new ArrayList<Allele>();
|
||||
final int insertionStart = refLoc.getStart() + refPos - 1;
|
||||
insertionAlleles.add( Allele.create(ref[refPos-1], true) );
|
||||
if( haplotype != null && (haplotype.leftBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1 || haplotype.rightBreakPoint + alignmentStartHapwrtRef + refLoc.getStart() - 1 == insertionStart + elementLength + 1) ) {
|
||||
insertionAlleles.add( SYMBOLIC_UNASSEMBLED_EVENT_ALLELE );
|
||||
} else {
|
||||
byte[] insertionBases = new byte[]{};
|
||||
insertionBases = ArrayUtils.add(insertionBases, ref[refPos-1]); // add the padding base
|
||||
insertionBases = ArrayUtils.addAll(insertionBases, Arrays.copyOfRange( alignment, alignmentPos, alignmentPos + elementLength ));
|
||||
insertionAlleles.add( Allele.create(insertionBases, false) );
|
||||
}
|
||||
vcs.put(insertionStart, new VariantContextBuilder(sourceNameToAdd, refLoc.getContig(), insertionStart, insertionStart, insertionAlleles).make());
|
||||
alignmentPos += elementLength;
|
||||
break;
|
||||
case S:
|
||||
|
|
|
|||
|
|
@ -281,7 +281,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
|
||||
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
|
||||
if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
|
||||
final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly", 0 ); // BUGBUG: need to put this function in a shared place
|
||||
for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
|
||||
final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
|
||||
if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
|
||||
|
|
@ -311,7 +311,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
|||
}
|
||||
|
||||
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
|
||||
//final int sizeOfActiveRegion = activeRegionStop - activeRegionStart;
|
||||
if( haplotype == null ) { return false; }
|
||||
|
||||
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
|
||||
haplotype.setAlignmentStartHapwrtRef( swConsensus.getAlignmentStart2wrt1() );
|
||||
haplotype.setCigar( AlignmentUtils.leftAlignIndel(swConsensus.getCigar(), ref, haplotype.getBases(), swConsensus.getAlignmentStart2wrt1(), 0) );
|
||||
|
|
|
|||
|
|
@ -331,7 +331,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
|
||||
double lodCutoff = 0.0;
|
||||
for( final Tranche tranche : tranches ) {
|
||||
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
|
||||
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001) == 0 ) {
|
||||
lodCutoff = tranche.minVQSLod;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.Cigar;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -160,49 +161,17 @@ public class Haplotype {
|
|||
}
|
||||
|
||||
@Requires({"refInsertLocation >= 0"})
|
||||
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, int refInsertLocation ) {
|
||||
|
||||
if( refAllele.length() != altAllele.length() ) { refInsertLocation++; }
|
||||
public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
|
||||
// refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
|
||||
final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
|
||||
if( haplotypeInsertLocation == -1 ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
return new Haplotype(bases.clone());
|
||||
if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
|
||||
return null;
|
||||
}
|
||||
byte[] newHaplotype;
|
||||
|
||||
try {
|
||||
if( refAllele.length() == altAllele.length() ) { // SNP or MNP
|
||||
newHaplotype = bases.clone();
|
||||
for( int iii = 0; iii < altAllele.length(); iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii];
|
||||
}
|
||||
} else if( refAllele.length() < altAllele.length() ) { // insertion
|
||||
final int altAlleleLength = altAllele.length() - 1;
|
||||
newHaplotype = new byte[bases.length + altAlleleLength];
|
||||
for( int iii = 0; iii < bases.length; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii];
|
||||
}
|
||||
for( int iii = newHaplotype.length - 1; iii > haplotypeInsertLocation + altAlleleLength - 1; iii-- ) {
|
||||
newHaplotype[iii] = newHaplotype[iii-altAlleleLength];
|
||||
}
|
||||
for( int iii = 0; iii < altAlleleLength; iii++ ) {
|
||||
newHaplotype[haplotypeInsertLocation+iii] = altAllele.getBases()[iii+1];
|
||||
}
|
||||
} else { // deletion
|
||||
final int shift = refAllele.length() - altAllele.length();
|
||||
final int altAlleleLength = altAllele.length() - 1;
|
||||
newHaplotype = new byte[bases.length - shift];
|
||||
for( int iii = 0; iii < haplotypeInsertLocation + altAlleleLength; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii];
|
||||
}
|
||||
for( int iii = haplotypeInsertLocation + altAlleleLength; iii < newHaplotype.length; iii++ ) {
|
||||
newHaplotype[iii] = bases[iii+shift];
|
||||
}
|
||||
}
|
||||
} catch (Exception e) { // event already on haplotype is too large/complex to insert another allele, most likely because of not enough reference padding
|
||||
return new Haplotype(bases.clone());
|
||||
}
|
||||
|
||||
return new Haplotype(newHaplotype);
|
||||
byte[] newHaplotypeBases = new byte[]{};
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
|
||||
newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
|
||||
return new Haplotype(newHaplotypeBases);
|
||||
}
|
||||
|
||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -53,11 +55,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AACTTCTGGTCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCACTTAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTTACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAAACTTCTGGTCAACTGGTCA";
|
||||
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 16, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -68,11 +70,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCGGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
|
||||
basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
|
||||
h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
|
||||
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -102,11 +104,11 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "AACTTTCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("A", "AACTT", 1, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("A", "AACTT", 0, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCACTTGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("A", "AACTT", 7, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("C", "CACTT", 6, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGACTTGGGGA" + "AGGC";
|
||||
basicInsertTest("A", "AACTT", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("G", "GACTT", 16, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -120,12 +122,12 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
h1CigarList.add(new CigarElement(3, CigarOperator.D));
|
||||
h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
|
||||
final Cigar h1Cigar = new Cigar(h1CigarList);
|
||||
String h1bases = "A" + "CGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("AACTT", "A", 1, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("AACTT", "A", 7, h1Cigar, bases, h1bases);
|
||||
String h1bases = "A" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("ATCG", "A", 0, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATAAAG" + "AGGGGGA" + "AGGC";
|
||||
basicInsertTest("CGATC", "AAA", 6, h1Cigar, bases, h1bases);
|
||||
h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGA" + "AGGC";
|
||||
basicInsertTest("AACTT", "A", 17, h1Cigar, bases, h1bases);
|
||||
basicInsertTest("GGGGG", "G", 16, h1Cigar, bases, h1bases);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -148,13 +150,16 @@ public class HaplotypeUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) {
|
||||
final int INDEL_PADDING_BASE = (ref.length() == alt.length() ? 0 : 1);
|
||||
final Haplotype h = new Haplotype(hap.getBytes());
|
||||
final Allele h1refAllele = Allele.create(ref, true);
|
||||
final Allele h1altAllele = Allele.create(alt, false);
|
||||
final ArrayList<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(h1refAllele);
|
||||
alleles.add(h1altAllele);
|
||||
final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
|
||||
h.setAlignmentStartHapwrtRef(0);
|
||||
h.setCigar(cigar);
|
||||
final Haplotype h1 = h.insertAllele(h1refAllele, h1altAllele, loc - INDEL_PADDING_BASE);
|
||||
final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
|
||||
final Haplotype h1expected = new Haplotype(newHap.getBytes());
|
||||
Assert.assertEquals(h1, h1expected);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue