Allele refactoring checkpoint 2: all code finally compiles, AD and STR annotations are fixed, and most of the UG integration tests pass.

This commit is contained in:
Eric Banks 2012-07-26 23:27:11 -04:00
parent a694d1b5de
commit baf3e33730
13 changed files with 83 additions and 333 deletions

View File

@ -290,16 +290,16 @@ public class PoolGenotypeLikelihoodsUnitTest {
final byte minQ = 5;
final byte maxQ = 40;
final byte refByte = refPileupTestProvider.getRefByte();
final String altBases = "TCA";
final String altBases = refByte + "TCA";
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
final List<Allele> trueAlleles = new ArrayList<Allele>();
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
trueAlleles.add(Allele.create("TC", false));
trueAlleles.add(Allele.create(refByte, true));
trueAlleles.add(Allele.create(refByte + "TC", false));
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
final int[] matchArray = {95, 995, 9995, 10000};
@ -329,12 +329,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
// create deletion VC
final int delLength = 4;
final List<Allele> delAlleles = new ArrayList<Allele>();
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
delAlleles.add(Allele.create(refByte, false));
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
for (int matches: matchArray) {
for (int mismatches: mismatchArray) {

View File

@ -88,13 +88,13 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
for ( PileupElement p : pileup ) {
if ( p.isBeforeInsertion() ) {
final Allele insertion = Allele.create(refBase + p.getEventBases(), false);
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
if ( alleleCounts.containsKey(insertion) ) {
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
}
} else if ( p.isBeforeDeletionStart() ) {
if ( p.getEventLength() == refAllele.length() + 1 ) {
if ( p.getEventLength() == refAllele.length() - 1 ) {
// this is indeed the deletion allele recorded in VC
final Allele deletion = Allele.create(refBase);
if ( alleleCounts.containsKey(deletion) ) {

View File

@ -255,7 +255,7 @@ public class ConsensusAlleleCounter {
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
final String insertionBases = ref.getBase() + s; // add reference padding
final String insertionBases = (char)ref.getBase() + s; // add reference padding
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
refAllele = Allele.create(ref.getBase(), true);
altAllele = Allele.create(insertionBases, false);

View File

@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private boolean DEBUG = false;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
private PairHMMIndelErrorModel pairModel;
private boolean allelesArePadded;
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
@ -105,22 +103,18 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
haplotypeMap.clear();
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
alleleList = pair.first;
allelesArePadded = pair.second;
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
if (alleleList.isEmpty())
return null;
}
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
if (haplotypeMap == null || haplotypeMap.isEmpty())
return null;
// start making the VariantContext
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
final int eventLength = getEventLength(alleleList);
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get();
}
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
Allele refAllele = alleles.get(0);
int endLoc = loc.getStart() + refAllele.length()-1;
if (allelesArePadded)
endLoc++;
return endLoc;
}
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
final ReferenceContext ref,
final GenomeLoc loc,
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
final ReferenceContext ref,
final Map<String, AlignmentContext> contexts,
final AlignmentContextUtils.ReadOrientation contextType,
final GenomeLocParser locParser,
final UnifiedArgumentCollection UAC,
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>();
boolean allelesArePadded = true;
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
// ignore places where we don't have a variant
if (vc == null)
return new Pair<List<Allele>,Boolean>(alleles,false);
return alleles;
if (ignoreSNPAllelesWhenGenotypingIndels) {
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
} else {
alleles.addAll(vc.getAlleles());
}
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
allelesArePadded = false;
} else {
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
}
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
return alleles;
}
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,

View File

@ -204,8 +204,11 @@ public class Haplotype {
return new Haplotype(newHaplotype);
}
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
final int haplotypeSize, final int numPrefBases) {
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
final int startPos,
final ReferenceContext ref,
final int haplotypeSize,
final int numPrefBases) {
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
@ -216,7 +219,6 @@ public class Haplotype {
refAllele = a;
break;
}
}
if (refAllele == null)
@ -224,19 +226,12 @@ public class Haplotype {
byte[] refBases = ref.getBases();
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
//int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
// protect against long events that overrun available reference context
if (startAfter > refBases.length)
startAfter = refBases.length;
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
startAfter, refBases.length);
final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
// Create location for all haplotypes
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
@ -244,16 +239,14 @@ public class Haplotype {
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
for (final Allele a : alleleList) {
byte[] alleleBases = a.getBases();
final byte[] alleleBases = a.getBases();
// use string concatenation
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
haplotypeString = haplotypeString.substring(0,haplotypeSize);
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
}
return haplotypeMap;

View File

@ -90,16 +90,23 @@ public class Allele implements Comparable<Allele> {
// null alleles are no longer allowed
if ( wouldBeNullAllele(bases) ) {
throw new IllegalArgumentException("Null alleles are not supported");
} else if ( wouldBeNoCallAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
}
// no-calls are represented as no bases
if ( wouldBeNoCallAllele(bases) ) {
this.bases = EMPTY_ALLELE_BASES;
isNoCall = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
} else if ( wouldBeSymbolicAllele(bases) ) {
return;
}
if ( wouldBeSymbolicAllele(bases) ) {
isSymbolic = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
}
else
else {
bases = BaseUtils.convertToUpperCase(bases);
}
this.isRef = isRef;
this.bases = bases;

View File

@ -1163,13 +1163,14 @@ public class VariantContextUtils {
if ( ! vc.isIndel() ) // only indels are tandem repeats
return null;
final Allele ref = vc.getReference();
final Allele refAllele = vc.getReference();
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
byte[] repeatUnit = null;
final ArrayList<Integer> lengths = new ArrayList<Integer>();
for ( final Allele allele : vc.getAlternateAlleles() ) {
Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes());
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
final int[] repetitionCount = result.first;
// repetition count = 0 means allele is not a tandem expansion of context
@ -1184,7 +1185,7 @@ public class VariantContextUtils {
repeatUnit = result.second;
if (VERBOSE) {
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
System.out.println("RU:"+new String(repeatUnit));
}

View File

@ -103,22 +103,23 @@ public class ArtificialReadPileupTestProvider {
boolean addBaseErrors, int phredScaledBaseErrorRate) {
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
String refBase = refBases.substring(offset,offset+1); // referenceContext.getBase()?
Allele refAllele, altAllele;
if (eventLength == 0) {// SNP case
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
if (eventLength == 0) {
// SNP case
refAllele = Allele.create(refBase,true);
altAllele = Allele.create(altBases.substring(0,1), false);
} else if (eventLength>0){
// insertion
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(altBases.substring(0,eventLength), false);
refAllele = Allele.create(refBase,true);
altAllele = Allele.create(refBase + altBases.substring(0,eventLength), false);
}
else {
// deletion
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
refAllele = Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
altAllele = Allele.create(refBase, false);
}
int stop = loc.getStart();
vcAlleles.add(refAllele);
@ -127,7 +128,6 @@ public class ArtificialReadPileupTestProvider {
final VariantContextBuilder builder = new VariantContextBuilder().source("");
builder.loc(loc.getContig(), loc.getStart(), stop);
builder.alleles(vcAlleles);
builder.referenceBaseForIndel(referenceContext.getBase());
builder.noGenotypes();
final VariantContext vc = builder.make();

View File

@ -1,226 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class VCFAlleleClipperUnitTest extends BaseTest {
// --------------------------------------------------------------------------------
//
// Test allele clipping
//
// --------------------------------------------------------------------------------
private class ClipAllelesTest extends TestDataProvider {
final int position;
final int stop;
final String ref;
List<Allele> inputs;
List<Allele> expected;
@Requires("arg.length % 2 == 0")
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
super(ClipAllelesTest.class);
this.position = position;
this.stop = stop;
this.ref = arg[0];
int n = arg.length / 2;
inputs = new ArrayList<Allele>(n);
expected = new ArrayList<Allele>(n);
for ( int i = 0; i < n; i++ ) {
final boolean ref = i % n == 0;
inputs.add(Allele.create(arg[i], ref));
}
for ( int i = n; i < arg.length; i++ ) {
final boolean ref = i % n == 0;
expected.add(Allele.create(arg[i], ref));
}
}
public boolean isClipped() {
for ( int i = 0; i < inputs.size(); i++ ) {
if ( inputs.get(i).length() != expected.get(i).length() )
return true;
}
return false;
}
public String toString() {
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
}
}
@DataProvider(name = "ClipAllelesTest")
public Object[][] makeClipAllelesTest() {
// do no harm
new ClipAllelesTest(10, 10, "A", "A");
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
// insertions
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
// deletions
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
// multi-allelic insertion and deletions
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
// cannot be clipped
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
// symbolic
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
// symbolic with alleles that should be clipped
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
// clipping from both ends
//
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
//
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
// complex substitutions
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
return ClipAllelesTest.getTests(ClipAllelesTest.class);
}
@Test(dataProvider = "ClipAllelesTest")
public void testClipAllelesTest(ClipAllelesTest cfg) {
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
}
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
if ( cfg.isClipped() ) {
// TODO
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
// TODO actually the original stop, as the original stop is +1 its true size.
// TODO
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
}
}
// --------------------------------------------------------------------------------
//
// basic allele clipping test
//
// --------------------------------------------------------------------------------
private class ReverseClippingPositionTestProvider extends TestDataProvider {
final String ref;
final List<Allele> alleles = new ArrayList<Allele>();
final int expectedClip;
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
super(ReverseClippingPositionTestProvider.class);
this.ref = ref;
for ( final String allele : alleles )
this.alleles.add(Allele.create(allele));
this.expectedClip = expectedClip;
}
@Override
public String toString() {
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
}
}
@DataProvider(name = "ReverseClippingPositionTestProvider")
public Object[][] makeReverseClippingPositionTestProvider() {
// pair clipping
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
// triplets
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
}
@Test(dataProvider = "ReverseClippingPositionTestProvider")
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
Assert.assertEquals(result, cfg.expectedClip);
}
}

View File

@ -225,10 +225,10 @@ public class VariantContextTestProvider {
add(builder());
add(builder().alleles("A"));
add(builder().alleles("A", "C", "T"));
add(builder().alleles("-", "C").referenceBaseForIndel("A"));
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
add(builder().alleles("A", "AC"));
add(builder().alleles("A", "ACAGT"));
add(builder().loc("1", 10, 11).alleles("AC", "A"));
add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
// make sure filters work
add(builder().unfiltered());
@ -302,8 +302,8 @@ public class VariantContextTestProvider {
sites.add(builder().alleles("A").make());
sites.add(builder().alleles("A", "C", "T").make());
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
sites.add(builder().alleles("A", "AC").make());
sites.add(builder().alleles("A", "ACAGT").make());
for ( VariantContext site : sites ) {
addGenotypes(site);

View File

@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
int snpLocStart = 10;
int snpLocStop = 10;
// - / ATC [ref] from 20-23
// - / ATC [ref] from 20-22
String delLoc = "chr1";
int delLocStart = 20;
int delLocStop = 23;
int delLocStop = 22;
// - [ref] / ATC from 20-20
String insLoc = "chr1";
int insLocStart = 20;
int insLocStop = 20;
// - / A / T / ATC [ref] from 20-23
String mixedLoc = "chr1";
int mixedLocStart = 20;
int mixedLocStop = 23;
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
@BeforeSuite
public void before() {
del = Allele.create("-");
delRef = Allele.create("-", true);
del = Allele.create("A");
delRef = Allele.create("A", true);
A = Allele.create("A");
C = Allele.create("C");
@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest {
@BeforeMethod
public void beforeTest() {
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
}
@Test
@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testCreatingDeletionVariantContext() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
Assert.assertEquals(vc.getChr(), delLoc);
Assert.assertEquals(vc.getStart(), delLocStart);
@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testMatchingAlleles() {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make();
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest {
@Test
public void testRepeatAllele() {
Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true);
Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false);
Allele atc = Allele.create("ATC", false);
Allele atcatc = Allele.create("ATCATC", false);
Allele ccccR = Allele.create("CCCC", true);
Allele cc = Allele.create("CC", false);
Allele cccccc = Allele.create("CCCCCC", false);
Allele gagaR = Allele.create("GAGA", true);
Allele gagagaga = Allele.create("GAGAGAGA", false);
Allele nullR = Allele.create("A", true);
Allele nullA = Allele.create("A", false);
Allele atc = Allele.create("AATC", false);
Allele atcatc = Allele.create("AATCATC", false);
Allele ccccR = Allele.create("ACCCC", true);
Allele cc = Allele.create("ACC", false);
Allele cccccc = Allele.create("ACCCCCC", false);
Allele gagaR = Allele.create("AGAGA", true);
Allele gagagaga = Allele.create("AGAGAGAGA", false);
Pair<List<Integer>,byte[]> result;
byte[] refBytes = "TATCATCATCGGA".getBytes();
@ -678,7 +673,7 @@ public class VariantContextUnitTest extends BaseTest {
@Test(dataProvider = "getAlleles")
public void testMergeAlleles(GetAllelesTest cfg) {
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");

View File

@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
int start = 10;
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make();
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
}
// --------------------------------------------------------------------------------

View File

@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest {
Map<String, Object> attributes = new HashMap<String,Object>();
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
alleles.add(Allele.create("-",true));
alleles.add(Allele.create("CC",false));
alleles.add(Allele.create("A",true));
alleles.add(Allele.create("ACC",false));
attributes.put("DP","50");
for (String name : header.getGenotypeSamples()) {
@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest {
genotypes.add(gt);
}
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
.genotypes(genotypes).attributes(attributes).make();
}