Allele refactoring checkpoint 2: all code finally compiles, AD and STR annotations are fixed, and most of the UG integration tests pass.
This commit is contained in:
parent
a694d1b5de
commit
baf3e33730
|
|
@ -290,16 +290,16 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
final byte minQ = 5;
|
||||
final byte maxQ = 40;
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final String altBases = "TCA";
|
||||
final String altBases = refByte + "TCA";
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
final List<Allele> trueAlleles = new ArrayList<Allele>();
|
||||
trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true));
|
||||
trueAlleles.add(Allele.create("TC", false));
|
||||
trueAlleles.add(Allele.create(refByte, true));
|
||||
trueAlleles.add(Allele.create(refByte + "TC", false));
|
||||
|
||||
final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases());
|
||||
final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||
refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles).
|
||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make();
|
||||
genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make();
|
||||
|
||||
|
||||
final int[] matchArray = {95, 995, 9995, 10000};
|
||||
|
|
@ -329,12 +329,12 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// create deletion VC
|
||||
final int delLength = 4;
|
||||
final List<Allele> delAlleles = new ArrayList<Allele>();
|
||||
delAlleles.add(Allele.create(fw.substring(1,delLength+1), true));
|
||||
delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false));
|
||||
delAlleles.add(Allele.create(fw.substring(0,delLength+1), true));
|
||||
delAlleles.add(Allele.create(refByte, false));
|
||||
|
||||
final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(),
|
||||
refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles).
|
||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make();
|
||||
genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make();
|
||||
|
||||
for (int matches: matchArray) {
|
||||
for (int mismatches: mismatchArray) {
|
||||
|
|
|
|||
|
|
@ -88,13 +88,13 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
|
|||
for ( PileupElement p : pileup ) {
|
||||
if ( p.isBeforeInsertion() ) {
|
||||
|
||||
final Allele insertion = Allele.create(refBase + p.getEventBases(), false);
|
||||
final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false);
|
||||
if ( alleleCounts.containsKey(insertion) ) {
|
||||
alleleCounts.put(insertion, alleleCounts.get(insertion)+1);
|
||||
}
|
||||
|
||||
} else if ( p.isBeforeDeletionStart() ) {
|
||||
if ( p.getEventLength() == refAllele.length() + 1 ) {
|
||||
if ( p.getEventLength() == refAllele.length() - 1 ) {
|
||||
// this is indeed the deletion allele recorded in VC
|
||||
final Allele deletion = Allele.create(refBase);
|
||||
if ( alleleCounts.containsKey(deletion) ) {
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ public class ConsensusAlleleCounter {
|
|||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
final String insertionBases = ref.getBase() + s; // add reference padding
|
||||
final String insertionBases = (char)ref.getBase() + s; // add reference padding
|
||||
if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions
|
||||
refAllele = Allele.create(ref.getBase(), true);
|
||||
altAllele = Allele.create(insertionBases, false);
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private boolean DEBUG = false;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
private boolean allelesArePadded;
|
||||
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement, LinkedHashMap<Allele, Double>>>() {
|
||||
protected synchronized HashMap<PileupElement, LinkedHashMap<Allele, Double>> initialValue() {
|
||||
|
|
@ -105,22 +103,18 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
indelLikelihoodMap.set(new HashMap<PileupElement, LinkedHashMap<Allele, Double>>());
|
||||
haplotypeMap.clear();
|
||||
|
||||
Pair<List<Allele>,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
alleleList = pair.first;
|
||||
allelesArePadded = pair.second;
|
||||
alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements
|
||||
if (haplotypeMap == null || haplotypeMap.isEmpty())
|
||||
return null;
|
||||
|
||||
// start making the VariantContext
|
||||
// For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base.
|
||||
|
||||
final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded);
|
||||
final int endLoc = loc.getStart() + alleleList.get(0).length() - 1;
|
||||
final int eventLength = getEventLength(alleleList);
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList);
|
||||
|
|
@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
return indelLikelihoodMap.get();
|
||||
}
|
||||
|
||||
public static int computeEndLocation(final List<Allele> alleles, final GenomeLoc loc, final boolean allelesArePadded) {
|
||||
Allele refAllele = alleles.get(0);
|
||||
int endLoc = loc.getStart() + refAllele.length()-1;
|
||||
if (allelesArePadded)
|
||||
endLoc++;
|
||||
|
||||
return endLoc;
|
||||
}
|
||||
|
||||
public static void getHaplotypeMapFromAlleles(final List<Allele> alleleList,
|
||||
final ReferenceContext ref,
|
||||
final GenomeLoc loc,
|
||||
|
|
@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
}
|
||||
|
||||
public static Pair<List<Allele>,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
public static List<Allele> getInitialAlleleList(final RefMetaDataTracker tracker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC,
|
||||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
boolean allelesArePadded = true;
|
||||
if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||
VariantContext vc = null;
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
||||
|
|
@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
// ignore places where we don't have a variant
|
||||
if (vc == null)
|
||||
return new Pair<List<Allele>,Boolean>(alleles,false);
|
||||
return alleles;
|
||||
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
// if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it
|
||||
|
|
@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
} else {
|
||||
alleles.addAll(vc.getAlleles());
|
||||
}
|
||||
if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1)
|
||||
allelesArePadded = false;
|
||||
|
||||
|
||||
|
||||
} else {
|
||||
alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC);
|
||||
}
|
||||
return new Pair<List<Allele>,Boolean> (alleles,allelesArePadded);
|
||||
return alleles;
|
||||
}
|
||||
|
||||
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
||||
|
|
|
|||
|
|
@ -204,8 +204,11 @@ public class Haplotype {
|
|||
return new Haplotype(newHaplotype);
|
||||
}
|
||||
|
||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(List<Allele> alleleList, int startPos, ReferenceContext ref,
|
||||
final int haplotypeSize, final int numPrefBases) {
|
||||
public static LinkedHashMap<Allele,Haplotype> makeHaplotypeListFromAlleles(final List<Allele> alleleList,
|
||||
final int startPos,
|
||||
final ReferenceContext ref,
|
||||
final int haplotypeSize,
|
||||
final int numPrefBases) {
|
||||
|
||||
LinkedHashMap<Allele,Haplotype> haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||
|
||||
|
|
@ -216,7 +219,6 @@ public class Haplotype {
|
|||
refAllele = a;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (refAllele == null)
|
||||
|
|
@ -224,19 +226,12 @@ public class Haplotype {
|
|||
|
||||
byte[] refBases = ref.getBases();
|
||||
|
||||
final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart();
|
||||
final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases));
|
||||
|
||||
int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart());
|
||||
//int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event
|
||||
|
||||
|
||||
byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases);
|
||||
int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length;
|
||||
// protect against long events that overrun available reference context
|
||||
if (startAfter > refBases.length)
|
||||
startAfter = refBases.length;
|
||||
byte[] basesAfterVariant = Arrays.copyOfRange(refBases,
|
||||
startAfter, refBases.length);
|
||||
|
||||
final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length);
|
||||
final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length));
|
||||
|
||||
// Create location for all haplotypes
|
||||
final int startLoc = ref.getWindow().getStart() + startIdxInReference;
|
||||
|
|
@ -244,16 +239,14 @@ public class Haplotype {
|
|||
|
||||
final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc);
|
||||
|
||||
|
||||
for (final Allele a : alleleList) {
|
||||
|
||||
byte[] alleleBases = a.getBases();
|
||||
final byte[] alleleBases = a.getBases();
|
||||
// use string concatenation
|
||||
String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant);
|
||||
String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant;
|
||||
haplotypeString = haplotypeString.substring(0,haplotypeSize);
|
||||
|
||||
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
||||
|
||||
haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus));
|
||||
}
|
||||
|
||||
return haplotypeMap;
|
||||
|
|
|
|||
|
|
@ -90,16 +90,23 @@ public class Allele implements Comparable<Allele> {
|
|||
// null alleles are no longer allowed
|
||||
if ( wouldBeNullAllele(bases) ) {
|
||||
throw new IllegalArgumentException("Null alleles are not supported");
|
||||
} else if ( wouldBeNoCallAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
}
|
||||
|
||||
// no-calls are represented as no bases
|
||||
if ( wouldBeNoCallAllele(bases) ) {
|
||||
this.bases = EMPTY_ALLELE_BASES;
|
||||
isNoCall = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
} else if ( wouldBeSymbolicAllele(bases) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ( wouldBeSymbolicAllele(bases) ) {
|
||||
isSymbolic = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele");
|
||||
}
|
||||
else
|
||||
else {
|
||||
bases = BaseUtils.convertToUpperCase(bases);
|
||||
}
|
||||
|
||||
this.isRef = isRef;
|
||||
this.bases = bases;
|
||||
|
|
|
|||
|
|
@ -1163,13 +1163,14 @@ public class VariantContextUtils {
|
|||
if ( ! vc.isIndel() ) // only indels are tandem repeats
|
||||
return null;
|
||||
|
||||
final Allele ref = vc.getReference();
|
||||
final Allele refAllele = vc.getReference();
|
||||
final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length());
|
||||
|
||||
byte[] repeatUnit = null;
|
||||
final ArrayList<Integer> lengths = new ArrayList<Integer>();
|
||||
|
||||
for ( final Allele allele : vc.getAlternateAlleles() ) {
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
Pair<int[],byte[]> result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes());
|
||||
|
||||
final int[] repetitionCount = result.first;
|
||||
// repetition count = 0 means allele is not a tandem expansion of context
|
||||
|
|
@ -1184,7 +1185,7 @@ public class VariantContextUtils {
|
|||
repeatUnit = result.second;
|
||||
if (VERBOSE) {
|
||||
System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad);
|
||||
System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0]));
|
||||
System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1]));
|
||||
System.out.println("RU:"+new String(repeatUnit));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -103,22 +103,23 @@ public class ArtificialReadPileupTestProvider {
|
|||
boolean addBaseErrors, int phredScaledBaseErrorRate) {
|
||||
// RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext);
|
||||
|
||||
|
||||
ArrayList<Allele> vcAlleles = new ArrayList<Allele>();
|
||||
String refBase = refBases.substring(offset,offset+1); // referenceContext.getBase()?
|
||||
Allele refAllele, altAllele;
|
||||
if (eventLength == 0) {// SNP case
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+1),true);
|
||||
if (eventLength == 0) {
|
||||
// SNP case
|
||||
refAllele = Allele.create(refBase,true);
|
||||
altAllele = Allele.create(altBases.substring(0,1), false);
|
||||
|
||||
} else if (eventLength>0){
|
||||
// insertion
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(altBases.substring(0,eventLength), false);
|
||||
refAllele = Allele.create(refBase,true);
|
||||
altAllele = Allele.create(refBase + altBases.substring(0,eventLength), false);
|
||||
}
|
||||
else {
|
||||
// deletion
|
||||
refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
refAllele = Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true);
|
||||
altAllele = Allele.create(refBase, false);
|
||||
}
|
||||
int stop = loc.getStart();
|
||||
vcAlleles.add(refAllele);
|
||||
|
|
@ -127,7 +128,6 @@ public class ArtificialReadPileupTestProvider {
|
|||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(referenceContext.getBase());
|
||||
builder.noGenotypes();
|
||||
|
||||
final VariantContext vc = builder.make();
|
||||
|
|
|
|||
|
|
@ -1,226 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.SkipException;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class VCFAlleleClipperUnitTest extends BaseTest {
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test allele clipping
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class ClipAllelesTest extends TestDataProvider {
|
||||
final int position;
|
||||
final int stop;
|
||||
final String ref;
|
||||
List<Allele> inputs;
|
||||
List<Allele> expected;
|
||||
|
||||
@Requires("arg.length % 2 == 0")
|
||||
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
|
||||
super(ClipAllelesTest.class);
|
||||
this.position = position;
|
||||
this.stop = stop;
|
||||
this.ref = arg[0];
|
||||
|
||||
int n = arg.length / 2;
|
||||
inputs = new ArrayList<Allele>(n);
|
||||
expected = new ArrayList<Allele>(n);
|
||||
|
||||
for ( int i = 0; i < n; i++ ) {
|
||||
final boolean ref = i % n == 0;
|
||||
inputs.add(Allele.create(arg[i], ref));
|
||||
}
|
||||
for ( int i = n; i < arg.length; i++ ) {
|
||||
final boolean ref = i % n == 0;
|
||||
expected.add(Allele.create(arg[i], ref));
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isClipped() {
|
||||
for ( int i = 0; i < inputs.size(); i++ ) {
|
||||
if ( inputs.get(i).length() != expected.get(i).length() )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
|
||||
}
|
||||
}
|
||||
@DataProvider(name = "ClipAllelesTest")
|
||||
public Object[][] makeClipAllelesTest() {
|
||||
// do no harm
|
||||
new ClipAllelesTest(10, 10, "A", "A");
|
||||
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
|
||||
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
|
||||
|
||||
// insertions
|
||||
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
|
||||
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
|
||||
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
|
||||
|
||||
// deletions
|
||||
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
|
||||
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
|
||||
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
|
||||
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
|
||||
|
||||
// multi-allelic insertion and deletions
|
||||
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
|
||||
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
|
||||
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
|
||||
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
|
||||
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
|
||||
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
|
||||
|
||||
// cannot be clipped
|
||||
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
|
||||
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
|
||||
|
||||
// symbolic
|
||||
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
|
||||
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
|
||||
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
|
||||
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
|
||||
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
|
||||
|
||||
// symbolic with alleles that should be clipped
|
||||
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
|
||||
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
|
||||
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
|
||||
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
|
||||
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
|
||||
|
||||
// clipping from both ends
|
||||
//
|
||||
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
|
||||
//
|
||||
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
|
||||
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
|
||||
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
|
||||
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
|
||||
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
|
||||
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
|
||||
|
||||
// complex substitutions
|
||||
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
|
||||
|
||||
return ClipAllelesTest.getTests(ClipAllelesTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClipAllelesTest")
|
||||
public void testClipAllelesTest(ClipAllelesTest cfg) {
|
||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
||||
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
|
||||
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
|
||||
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
|
||||
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
|
||||
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
||||
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
|
||||
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
|
||||
|
||||
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
|
||||
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
|
||||
|
||||
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
|
||||
|
||||
if ( cfg.isClipped() ) {
|
||||
// TODO
|
||||
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
|
||||
// TODO actually the original stop, as the original stop is +1 its true size.
|
||||
// TODO
|
||||
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
|
||||
|
||||
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
|
||||
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
|
||||
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
|
||||
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// basic allele clipping test
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class ReverseClippingPositionTestProvider extends TestDataProvider {
|
||||
final String ref;
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
final int expectedClip;
|
||||
|
||||
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
|
||||
super(ReverseClippingPositionTestProvider.class);
|
||||
this.ref = ref;
|
||||
for ( final String allele : alleles )
|
||||
this.alleles.add(Allele.create(allele));
|
||||
this.expectedClip = expectedClip;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReverseClippingPositionTestProvider")
|
||||
public Object[][] makeReverseClippingPositionTestProvider() {
|
||||
// pair clipping
|
||||
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
|
||||
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||
|
||||
// triplets
|
||||
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
|
||||
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||
|
||||
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||
Assert.assertEquals(result, cfg.expectedClip);
|
||||
}
|
||||
}
|
||||
|
|
@ -225,10 +225,10 @@ public class VariantContextTestProvider {
|
|||
add(builder());
|
||||
add(builder().alleles("A"));
|
||||
add(builder().alleles("A", "C", "T"));
|
||||
add(builder().alleles("-", "C").referenceBaseForIndel("A"));
|
||||
add(builder().alleles("-", "CAGT").referenceBaseForIndel("A"));
|
||||
add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A"));
|
||||
add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A"));
|
||||
add(builder().alleles("A", "AC"));
|
||||
add(builder().alleles("A", "ACAGT"));
|
||||
add(builder().loc("1", 10, 11).alleles("AC", "A"));
|
||||
add(builder().loc("1", 10, 13).alleles("ACGT", "A"));
|
||||
|
||||
// make sure filters work
|
||||
add(builder().unfiltered());
|
||||
|
|
@ -302,8 +302,8 @@ public class VariantContextTestProvider {
|
|||
|
||||
sites.add(builder().alleles("A").make());
|
||||
sites.add(builder().alleles("A", "C", "T").make());
|
||||
sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make());
|
||||
sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make());
|
||||
sites.add(builder().alleles("A", "AC").make());
|
||||
sites.add(builder().alleles("A", "ACAGT").make());
|
||||
|
||||
for ( VariantContext site : sites ) {
|
||||
addGenotypes(site);
|
||||
|
|
|
|||
|
|
@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
int snpLocStart = 10;
|
||||
int snpLocStop = 10;
|
||||
|
||||
// - / ATC [ref] from 20-23
|
||||
// - / ATC [ref] from 20-22
|
||||
String delLoc = "chr1";
|
||||
int delLocStart = 20;
|
||||
int delLocStop = 23;
|
||||
int delLocStop = 22;
|
||||
|
||||
// - [ref] / ATC from 20-20
|
||||
String insLoc = "chr1";
|
||||
int insLocStart = 20;
|
||||
int insLocStop = 20;
|
||||
|
||||
// - / A / T / ATC [ref] from 20-23
|
||||
String mixedLoc = "chr1";
|
||||
int mixedLocStart = 20;
|
||||
int mixedLocStop = 23;
|
||||
|
||||
VariantContextBuilder basicBuilder, snpBuilder, insBuilder;
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
del = Allele.create("A");
|
||||
delRef = Allele.create("A", true);
|
||||
|
||||
A = Allele.create("A");
|
||||
C = Allele.create("C");
|
||||
|
|
@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@BeforeMethod
|
||||
public void beforeTest() {
|
||||
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
|
||||
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
|
||||
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
|
||||
basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
|
||||
snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T));
|
||||
insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testCreatingDeletionVariantContext() {
|
||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
|
||||
|
||||
Assert.assertEquals(vc.getChr(), delLoc);
|
||||
Assert.assertEquals(vc.getStart(), delLocStart);
|
||||
|
|
@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testMatchingAlleles() {
|
||||
List<Allele> alleles = Arrays.asList(ATCref, del);
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make();
|
||||
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make();
|
||||
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
|
||||
VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make();
|
||||
|
||||
Assert.assertTrue(vc.hasSameAllelesAs(vc2));
|
||||
Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2));
|
||||
|
|
@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testRepeatAllele() {
|
||||
Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
Allele atc = Allele.create("ATC", false);
|
||||
Allele atcatc = Allele.create("ATCATC", false);
|
||||
Allele ccccR = Allele.create("CCCC", true);
|
||||
Allele cc = Allele.create("CC", false);
|
||||
Allele cccccc = Allele.create("CCCCCC", false);
|
||||
Allele gagaR = Allele.create("GAGA", true);
|
||||
Allele gagagaga = Allele.create("GAGAGAGA", false);
|
||||
Allele nullR = Allele.create("A", true);
|
||||
Allele nullA = Allele.create("A", false);
|
||||
Allele atc = Allele.create("AATC", false);
|
||||
Allele atcatc = Allele.create("AATCATC", false);
|
||||
Allele ccccR = Allele.create("ACCCC", true);
|
||||
Allele cc = Allele.create("ACC", false);
|
||||
Allele cccccc = Allele.create("ACCCCCC", false);
|
||||
Allele gagaR = Allele.create("AGAGA", true);
|
||||
Allele gagagaga = Allele.create("AGAGAGAGA", false);
|
||||
|
||||
Pair<List<Integer>,byte[]> result;
|
||||
byte[] refBytes = "TATCATCATCGGA".getBytes();
|
||||
|
|
@ -678,7 +673,7 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "getAlleles")
|
||||
public void testMergeAlleles(GetAllelesTest cfg) {
|
||||
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
|
||||
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();
|
||||
final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make();
|
||||
|
||||
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
|
||||
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
|||
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
||||
int start = 10;
|
||||
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make();
|
||||
return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
Map<String, Object> attributes = new HashMap<String,Object>();
|
||||
GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
|
||||
|
||||
alleles.add(Allele.create("-",true));
|
||||
alleles.add(Allele.create("CC",false));
|
||||
alleles.add(Allele.create("A",true));
|
||||
alleles.add(Allele.create("ACC",false));
|
||||
|
||||
attributes.put("DP","50");
|
||||
for (String name : header.getGenotypeSamples()) {
|
||||
|
|
@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
genotypes.add(gt);
|
||||
}
|
||||
return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles)
|
||||
.genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make();
|
||||
.genotypes(genotypes).attributes(attributes).make();
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue