diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index 35a1bb043..a7dd65bb2 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -290,16 +290,16 @@ public class PoolGenotypeLikelihoodsUnitTest { final byte minQ = 5; final byte maxQ = 40; final byte refByte = refPileupTestProvider.getRefByte(); - final String altBases = "TCA"; + final String altBases = refByte + "TCA"; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); final List trueAlleles = new ArrayList(); - trueAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, true)); - trueAlleles.add(Allele.create("TC", false)); + trueAlleles.add(Allele.create(refByte, true)); + trueAlleles.add(Allele.create(refByte + "TC", false)); final String fw = new String(refPileupTestProvider.getReferenceContext().getForwardBases()); final VariantContext refInsertionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), refPileupTestProvider.getReferenceContext().getLocus().getStart(), trueAlleles). - genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).referenceBaseForIndel(refByte).make(); + genotypes(GenotypeBuilder.create(refSampleName, trueAlleles)).make(); final int[] matchArray = {95, 995, 9995, 10000}; @@ -329,12 +329,12 @@ public class PoolGenotypeLikelihoodsUnitTest { // create deletion VC final int delLength = 4; final List delAlleles = new ArrayList(); - delAlleles.add(Allele.create(fw.substring(1,delLength+1), true)); - delAlleles.add(Allele.create(Allele.NULL_ALLELE_STRING, false)); + delAlleles.add(Allele.create(fw.substring(0,delLength+1), true)); + delAlleles.add(Allele.create(refByte, false)); final VariantContext refDeletionVC = new VariantContextBuilder("test","chr1",refPileupTestProvider.getReferenceContext().getLocus().getStart(), refPileupTestProvider.getReferenceContext().getLocus().getStart()+delLength, delAlleles). - genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).referenceBaseForIndel(refByte).make(); + genotypes(GenotypeBuilder.create(refSampleName, delAlleles)).make(); for (int matches: matchArray) { for (int mismatches: mismatchArray) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index 261f6433b..a9edab752 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -88,13 +88,13 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa for ( PileupElement p : pileup ) { if ( p.isBeforeInsertion() ) { - final Allele insertion = Allele.create(refBase + p.getEventBases(), false); + final Allele insertion = Allele.create((char)refBase + p.getEventBases(), false); if ( alleleCounts.containsKey(insertion) ) { alleleCounts.put(insertion, alleleCounts.get(insertion)+1); } } else if ( p.isBeforeDeletionStart() ) { - if ( p.getEventLength() == refAllele.length() + 1 ) { + if ( p.getEventLength() == refAllele.length() - 1 ) { // this is indeed the deletion allele recorded in VC final Allele deletion = Allele.create(refBase); if ( alleleCounts.containsKey(deletion) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index d2071a9fb..869e52216 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -255,7 +255,7 @@ public class ConsensusAlleleCounter { else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case - final String insertionBases = ref.getBase() + s; // add reference padding + final String insertionBases = (char)ref.getBase() + s; // add reference padding if (Allele.acceptableAlleleBases(insertionBases, false)) { // don't allow N's in insertions refAllele = Allele.create(ref.getBase(), true); altAllele = Allele.create(insertionBases, false); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 7eabe7a18..bedffa690 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -48,8 +47,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private boolean DEBUG = false; private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private PairHMMIndelErrorModel pairModel; - private boolean allelesArePadded; - + private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { protected synchronized HashMap> initialValue() { @@ -105,22 +103,18 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood indelLikelihoodMap.set(new HashMap>()); haplotypeMap.clear(); - Pair,Boolean> pair = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); - alleleList = pair.first; - allelesArePadded = pair.second; + alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); if (alleleList.isEmpty()) return null; } - getHaplotypeMapFromAlleles(alleleList, ref, loc, haplotypeMap); // will update haplotypeMap adding elements if (haplotypeMap == null || haplotypeMap.isEmpty()) return null; // start making the VariantContext // For all non-snp VC types, VC end location is just startLocation + length of ref allele including padding base. - - final int endLoc = computeEndLocation(alleleList, loc,allelesArePadded); + final int endLoc = loc.getStart() + alleleList.get(0).length() - 1; final int eventLength = getEventLength(alleleList); final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList); @@ -160,15 +154,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return indelLikelihoodMap.get(); } - public static int computeEndLocation(final List alleles, final GenomeLoc loc, final boolean allelesArePadded) { - Allele refAllele = alleles.get(0); - int endLoc = loc.getStart() + refAllele.length()-1; - if (allelesArePadded) - endLoc++; - - return endLoc; - } - public static void getHaplotypeMapFromAlleles(final List alleleList, final ReferenceContext ref, final GenomeLoc loc, @@ -213,16 +198,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } - public static Pair,Boolean> getInitialAlleleList(final RefMetaDataTracker tracker, + public static List getInitialAlleleList(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map contexts, final AlignmentContextUtils.ReadOrientation contextType, final GenomeLocParser locParser, final UnifiedArgumentCollection UAC, final boolean ignoreSNPAllelesWhenGenotypingIndels) { - + List alleles = new ArrayList(); - boolean allelesArePadded = true; if (UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) { VariantContext vc = null; for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { @@ -235,7 +219,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } // ignore places where we don't have a variant if (vc == null) - return new Pair,Boolean>(alleles,false); + return alleles; if (ignoreSNPAllelesWhenGenotypingIndels) { // if there's an allele that has same length as the reference (i.e. a SNP or MNP), ignore it and don't genotype it @@ -248,15 +232,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } else { alleles.addAll(vc.getAlleles()); } - if ( vc.getReference().getBases().length == vc.getEnd()-vc.getStart()+1) - allelesArePadded = false; - - } else { - alleles = IndelGenotypeLikelihoodsCalculationModel.computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); + alleles = computeConsensusAlleles(ref, contexts, contextType, locParser, UAC); } - return new Pair,Boolean> (alleles,allelesArePadded); + return alleles; } // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup, diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 829e75682..143a053c9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -204,8 +204,11 @@ public class Haplotype { return new Haplotype(newHaplotype); } - public static LinkedHashMap makeHaplotypeListFromAlleles(List alleleList, int startPos, ReferenceContext ref, - final int haplotypeSize, final int numPrefBases) { + public static LinkedHashMap makeHaplotypeListFromAlleles(final List alleleList, + final int startPos, + final ReferenceContext ref, + final int haplotypeSize, + final int numPrefBases) { LinkedHashMap haplotypeMap = new LinkedHashMap(); @@ -216,7 +219,6 @@ public class Haplotype { refAllele = a; break; } - } if (refAllele == null) @@ -224,19 +226,12 @@ public class Haplotype { byte[] refBases = ref.getBases(); + final int startIdxInReference = 1 + startPos - numPrefBases - ref.getWindow().getStart(); + final String basesBeforeVariant = new String(Arrays.copyOfRange(refBases, startIdxInReference, startIdxInReference + numPrefBases)); - int startIdxInReference = (int)(1+startPos-numPrefBases-ref.getWindow().getStart()); - //int numPrefBases = (int)(vc.getStart()-ref.getWindow().getStart()+1); // indel vc starts one before event - - - byte[] basesBeforeVariant = Arrays.copyOfRange(refBases,startIdxInReference,startIdxInReference+numPrefBases); - int startAfter = startIdxInReference+numPrefBases+ refAllele.getBases().length; // protect against long events that overrun available reference context - if (startAfter > refBases.length) - startAfter = refBases.length; - byte[] basesAfterVariant = Arrays.copyOfRange(refBases, - startAfter, refBases.length); - + final int startAfter = Math.min(startIdxInReference + numPrefBases + refAllele.getBases().length - 1, refBases.length); + final String basesAfterVariant = new String(Arrays.copyOfRange(refBases, startAfter, refBases.length)); // Create location for all haplotypes final int startLoc = ref.getWindow().getStart() + startIdxInReference; @@ -244,16 +239,14 @@ public class Haplotype { final GenomeLoc locus = ref.getGenomeLocParser().createGenomeLoc(ref.getLocus().getContig(),startLoc,stopLoc); - for (final Allele a : alleleList) { - byte[] alleleBases = a.getBases(); + final byte[] alleleBases = a.getBases(); // use string concatenation - String haplotypeString = new String(basesBeforeVariant) + new String(alleleBases) + new String(basesAfterVariant); + String haplotypeString = basesBeforeVariant + new String(Arrays.copyOfRange(alleleBases, 1, alleleBases.length)) + basesAfterVariant; haplotypeString = haplotypeString.substring(0,haplotypeSize); - haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); - + haplotypeMap.put(a,new Haplotype(haplotypeString.getBytes(), locus)); } return haplotypeMap; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 1947ef01e..aa63a9dac 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -90,16 +90,23 @@ public class Allele implements Comparable { // null alleles are no longer allowed if ( wouldBeNullAllele(bases) ) { throw new IllegalArgumentException("Null alleles are not supported"); - } else if ( wouldBeNoCallAllele(bases) ) { - bases = EMPTY_ALLELE_BASES; + } + + // no-calls are represented as no bases + if ( wouldBeNoCallAllele(bases) ) { + this.bases = EMPTY_ALLELE_BASES; isNoCall = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); - } else if ( wouldBeSymbolicAllele(bases) ) { + return; + } + + if ( wouldBeSymbolicAllele(bases) ) { isSymbolic = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a symbolic allele as the reference allele"); } - else + else { bases = BaseUtils.convertToUpperCase(bases); + } this.isRef = isRef; this.bases = bases; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index e1a043e94..e9388205f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -1163,13 +1163,14 @@ public class VariantContextUtils { if ( ! vc.isIndel() ) // only indels are tandem repeats return null; - final Allele ref = vc.getReference(); + final Allele refAllele = vc.getReference(); + final byte[] refAlleleBases = Arrays.copyOfRange(refAllele.getBases(), 1, refAllele.length()); byte[] repeatUnit = null; final ArrayList lengths = new ArrayList(); for ( final Allele allele : vc.getAlternateAlleles() ) { - Pair result = getNumTandemRepeatUnits(ref.getBases(), allele.getBases(), refBasesStartingAtVCWithoutPad.getBytes()); + Pair result = getNumTandemRepeatUnits(refAlleleBases, Arrays.copyOfRange(allele.getBases(), 1, allele.length()), refBasesStartingAtVCWithoutPad.getBytes()); final int[] repetitionCount = result.first; // repetition count = 0 means allele is not a tandem expansion of context @@ -1184,7 +1185,7 @@ public class VariantContextUtils { repeatUnit = result.second; if (VERBOSE) { System.out.println("RefContext:"+refBasesStartingAtVCWithoutPad); - System.out.println("Ref:"+ref.toString()+" Count:" + String.valueOf(repetitionCount[0])); + System.out.println("Ref:"+refAllele.toString()+" Count:" + String.valueOf(repetitionCount[0])); System.out.println("Allele:"+allele.toString()+" Count:" + String.valueOf(repetitionCount[1])); System.out.println("RU:"+new String(repeatUnit)); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index 256f93473..aa4885406 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -103,22 +103,23 @@ public class ArtificialReadPileupTestProvider { boolean addBaseErrors, int phredScaledBaseErrorRate) { // RefMetaDataTracker tracker = new RefMetaDataTracker(null,referenceContext); - ArrayList vcAlleles = new ArrayList(); + String refBase = refBases.substring(offset,offset+1); // referenceContext.getBase()? Allele refAllele, altAllele; - if (eventLength == 0) {// SNP case - refAllele =Allele.create(refBases.substring(offset,offset+1),true); + if (eventLength == 0) { + // SNP case + refAllele = Allele.create(refBase,true); altAllele = Allele.create(altBases.substring(0,1), false); } else if (eventLength>0){ // insertion - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(altBases.substring(0,eventLength), false); + refAllele = Allele.create(refBase,true); + altAllele = Allele.create(refBase + altBases.substring(0,eventLength), false); } else { // deletion - refAllele =Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + refAllele = Allele.create(refBases.substring(offset,offset+Math.abs(eventLength)),true); + altAllele = Allele.create(refBase, false); } int stop = loc.getStart(); vcAlleles.add(refAllele); @@ -127,7 +128,6 @@ public class ArtificialReadPileupTestProvider { final VariantContextBuilder builder = new VariantContextBuilder().source(""); builder.loc(loc.getContig(), loc.getStart(), stop); builder.alleles(vcAlleles); - builder.referenceBaseForIndel(referenceContext.getBase()); builder.noGenotypes(); final VariantContext vc = builder.make(); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java deleted file mode 100644 index 8cd051e01..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFAlleleClipperUnitTest.java +++ /dev/null @@ -1,226 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -package org.broadinstitute.sting.utils.codecs.vcf; - -import com.google.java.contract.Requires; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.variantcontext.*; -import org.testng.Assert; -import org.testng.SkipException; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -public class VCFAlleleClipperUnitTest extends BaseTest { - // -------------------------------------------------------------------------------- - // - // Test allele clipping - // - // -------------------------------------------------------------------------------- - - private class ClipAllelesTest extends TestDataProvider { - final int position; - final int stop; - final String ref; - List inputs; - List expected; - - @Requires("arg.length % 2 == 0") - private ClipAllelesTest(final int position, final int stop, final String ... arg) { - super(ClipAllelesTest.class); - this.position = position; - this.stop = stop; - this.ref = arg[0]; - - int n = arg.length / 2; - inputs = new ArrayList(n); - expected = new ArrayList(n); - - for ( int i = 0; i < n; i++ ) { - final boolean ref = i % n == 0; - inputs.add(Allele.create(arg[i], ref)); - } - for ( int i = n; i < arg.length; i++ ) { - final boolean ref = i % n == 0; - expected.add(Allele.create(arg[i], ref)); - } - } - - public boolean isClipped() { - for ( int i = 0; i < inputs.size(); i++ ) { - if ( inputs.get(i).length() != expected.get(i).length() ) - return true; - } - - return false; - } - - public String toString() { - return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected); - } - } - @DataProvider(name = "ClipAllelesTest") - public Object[][] makeClipAllelesTest() { - // do no harm - new ClipAllelesTest(10, 10, "A", "A"); - new ClipAllelesTest(10, 10, "A", "C", "A", "C"); - new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G"); - - // insertions - new ClipAllelesTest(10, 10, "A", "AA", "-", "A"); - new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA"); - new ClipAllelesTest(10, 10, "A", "AG", "-", "G"); - - // deletions - new ClipAllelesTest(10, 11, "AA", "A", "A", "-"); - new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-"); - new ClipAllelesTest(10, 11, "AG", "A", "G", "-"); - new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-"); - - // multi-allelic insertion and deletions - new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA"); - new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG"); - new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA"); - new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA"); - new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG"); - new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G"); - - // cannot be clipped - new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG"); - new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG"); - - // symbolic - new ClipAllelesTest(10, 100, "A", "", "A", ""); - new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]"); - new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T"); - new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51["); - new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]"); - - // symbolic with alleles that should be clipped - new ClipAllelesTest(10, 100, "A", "", "AA", "-", "", "A"); - new ClipAllelesTest(10, 100, "AA", "", "A", "A", "", "-"); - new ClipAllelesTest(10, 100, "AA", "", "A", "AAA", "A", "", "-", "AA"); - new ClipAllelesTest(10, 100, "AG", "", "A", "AGA", "G", "", "-", "GA"); - new ClipAllelesTest(10, 100, "G", "", "A", "G", "", "A"); - - // clipping from both ends - // - // TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END - // -// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T"); -// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T"); -// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT"); -// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT"); - - // complex substitutions - new ClipAllelesTest(10, 10, "A", "GA", "A", "GA"); - - return ClipAllelesTest.getTests(ClipAllelesTest.class); - } - - @Test(dataProvider = "ClipAllelesTest") - public void testClipAllelesTest(ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - Assert.assertNull(clipped.getError(), "Unexpected error occurred"); - Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop"); - Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles"); - } - - @Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest") - public void testPaddingAllelesInVC(final ClipAllelesTest cfg) { - final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop); - final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles()) - .referenceBaseForIndel(clipped.getRefBaseForIndel()).make(); - - if ( vc.isMixed() && vc.hasSymbolicAlleles() ) - throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized"); - - Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method"); - - if ( cfg.isClipped() ) { - // TODO - // TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is - // TODO actually the original stop, as the original stop is +1 its true size. - // TODO - final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1); - - final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc); - Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start"); - Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles"); - Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)"); - Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again"); - } - } - - // -------------------------------------------------------------------------------- - // - // basic allele clipping test - // - // -------------------------------------------------------------------------------- - - private class ReverseClippingPositionTestProvider extends TestDataProvider { - final String ref; - final List alleles = new ArrayList(); - final int expectedClip; - - private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) { - super(ReverseClippingPositionTestProvider.class); - this.ref = ref; - for ( final String allele : alleles ) - this.alleles.add(Allele.create(allele)); - this.expectedClip = expectedClip; - } - - @Override - public String toString() { - return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); - } - } - - @DataProvider(name = "ReverseClippingPositionTestProvider") - public Object[][] makeReverseClippingPositionTestProvider() { - // pair clipping - new ReverseClippingPositionTestProvider(0, "ATT", "CCG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CCT"); - new ReverseClippingPositionTestProvider(2, "ATT", "CTT"); - new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele - - // triplets - new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG"); - new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go - new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go - - return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class); - } - - - @Test(dataProvider = "ReverseClippingPositionTestProvider") - public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) { - int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false); - Assert.assertEquals(result, cfg.expectedClip); - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 1a0e8e39d..b95e589b7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -225,10 +225,10 @@ public class VariantContextTestProvider { add(builder()); add(builder().alleles("A")); add(builder().alleles("A", "C", "T")); - add(builder().alleles("-", "C").referenceBaseForIndel("A")); - add(builder().alleles("-", "CAGT").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 11).alleles("C", "-").referenceBaseForIndel("A")); - add(builder().loc("1", 10, 13).alleles("CGT", "-").referenceBaseForIndel("A")); + add(builder().alleles("A", "AC")); + add(builder().alleles("A", "ACAGT")); + add(builder().loc("1", 10, 11).alleles("AC", "A")); + add(builder().loc("1", 10, 13).alleles("ACGT", "A")); // make sure filters work add(builder().unfiltered()); @@ -302,8 +302,8 @@ public class VariantContextTestProvider { sites.add(builder().alleles("A").make()); sites.add(builder().alleles("A", "C", "T").make()); - sites.add(builder().alleles("-", "C").referenceBaseForIndel("A").make()); - sites.add(builder().alleles("-", "CAGT").referenceBaseForIndel("A").make()); + sites.add(builder().alleles("A", "AC").make()); + sites.add(builder().alleles("A", "ACAGT").make()); for ( VariantContext site : sites ) { addGenotypes(site); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 11c75ed9a..46153221d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -28,27 +28,22 @@ public class VariantContextUnitTest extends BaseTest { int snpLocStart = 10; int snpLocStop = 10; - // - / ATC [ref] from 20-23 + // - / ATC [ref] from 20-22 String delLoc = "chr1"; int delLocStart = 20; - int delLocStop = 23; + int delLocStop = 22; // - [ref] / ATC from 20-20 String insLoc = "chr1"; int insLocStart = 20; int insLocStop = 20; - // - / A / T / ATC [ref] from 20-23 - String mixedLoc = "chr1"; - int mixedLocStart = 20; - int mixedLocStop = 23; - VariantContextBuilder basicBuilder, snpBuilder, insBuilder; @BeforeSuite public void before() { - del = Allele.create("-"); - delRef = Allele.create("-", true); + del = Allele.create("A"); + delRef = Allele.create("A", true); A = Allele.create("A"); C = Allele.create("C"); @@ -62,9 +57,9 @@ public class VariantContextUnitTest extends BaseTest { @BeforeMethod public void beforeTest() { - basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A'); - insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A'); + basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)); + insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)); } @Test @@ -213,7 +208,7 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testCreatingDeletionVariantContext() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); Assert.assertEquals(vc.getChr(), delLoc); Assert.assertEquals(vc.getStart(), delLocStart); @@ -240,8 +235,8 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testMatchingAlleles() { List alleles = Arrays.asList(ATCref, del); - VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).referenceBaseForIndel((byte)'A').make(); - VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).referenceBaseForIndel((byte)'A').make(); + VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make(); + VariantContext vc2 = new VariantContextBuilder("test2", delLoc, delLocStart+12, delLocStop+12, alleles).make(); Assert.assertTrue(vc.hasSameAllelesAs(vc2)); Assert.assertTrue(vc.hasSameAlternateAllelesAs(vc2)); @@ -470,15 +465,15 @@ public class VariantContextUnitTest extends BaseTest { @Test public void testRepeatAllele() { - Allele nullR = Allele.create(Allele.NULL_ALLELE_STRING, true); - Allele nullA = Allele.create(Allele.NULL_ALLELE_STRING, false); - Allele atc = Allele.create("ATC", false); - Allele atcatc = Allele.create("ATCATC", false); - Allele ccccR = Allele.create("CCCC", true); - Allele cc = Allele.create("CC", false); - Allele cccccc = Allele.create("CCCCCC", false); - Allele gagaR = Allele.create("GAGA", true); - Allele gagagaga = Allele.create("GAGAGAGA", false); + Allele nullR = Allele.create("A", true); + Allele nullA = Allele.create("A", false); + Allele atc = Allele.create("AATC", false); + Allele atcatc = Allele.create("AATCATC", false); + Allele ccccR = Allele.create("ACCCC", true); + Allele cc = Allele.create("ACC", false); + Allele cccccc = Allele.create("ACCCCCC", false); + Allele gagaR = Allele.create("AGAGA", true); + Allele gagagaga = Allele.create("AGAGAGAGA", false); Pair,byte[]> result; byte[] refBytes = "TATCATCATCGGA".getBytes(); @@ -678,7 +673,7 @@ public class VariantContextUnitTest extends BaseTest { @Test(dataProvider = "getAlleles") public void testMergeAlleles(GetAllelesTest cfg) { final List altAlleles = cfg.alleles.subList(1, cfg.alleles.size()); - final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make(); + final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).make(); Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles"); Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size"); diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index b09a10d07..8c86a54de 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -99,7 +99,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { private VariantContext makeVC(String source, List alleles, Collection genotypes, Set filters) { int start = 10; int stop = start; // alleles.contains(ATC) ? start + 3 : start; - return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).referenceBaseForIndel(Cref.getBases()[0]).make(); + return new VariantContextBuilder(source, "1", start, stop, alleles).genotypes(genotypes).filters(filters).make(); } // -------------------------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java index a7fff4559..5876efa12 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriterUnitTest.java @@ -139,8 +139,8 @@ public class VCFWriterUnitTest extends BaseTest { Map attributes = new HashMap(); GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size()); - alleles.add(Allele.create("-",true)); - alleles.add(Allele.create("CC",false)); + alleles.add(Allele.create("A",true)); + alleles.add(Allele.create("ACC",false)); attributes.put("DP","50"); for (String name : header.getGenotypeSamples()) { @@ -148,7 +148,7 @@ public class VCFWriterUnitTest extends BaseTest { genotypes.add(gt); } return new VariantContextBuilder("RANDOM", loc.getContig(), loc.getStart(), loc.getStop(), alleles) - .genotypes(genotypes).attributes(attributes).referenceBaseForIndel((byte)'A').make(); + .genotypes(genotypes).attributes(attributes).make(); }