diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 75c4037d5..820fd248a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -47,7 +47,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim if (!g.isHet()) return null; - Set altAlleles = vc.getAlternateAlleles(); + Collection altAlleles = vc.getAlternateAlleles(); if ( altAlleles.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 70f3c6a1a..1bb697056 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; +import java.util.List; import java.util.Map; import java.util.Set; @@ -76,7 +77,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { */ protected abstract void getLog10PNonRef(RefMetaDataTracker tracker, ReferenceContext ref, - Map GLs, Set Alleles, + Map GLs, List Alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 8a3a97823..f87eae781 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -29,19 +29,14 @@ import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import sun.reflect.generics.reflectiveObjects.NotImplementedException; import java.io.PrintStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; +import java.util.*; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // @@ -58,7 +53,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public void getLog10PNonRef(RefMetaDataTracker tracker, ReferenceContext ref, - Map GLs, Setalleles, + Map GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java index 10b646d63..f4195e5f0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GridSearchAFEstimation.java @@ -54,7 +54,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel { protected void getLog10PNonRef(RefMetaDataTracker tracker, ReferenceContext ref, - Map GLs, Setalleles, + Map GLs, List alleles, double[] log10AlleleFrequencyPriors, double[] log10AlleleFrequencyPosteriors) { initializeAFMatrix(GLs); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 9e8cc07a6..b72b68f9f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -423,7 +423,7 @@ public class UnifiedGenotyperEngine { int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); - Set myAlleles = vc.getAlleles(); + Set myAlleles = new HashSet(vc.getAlleles()); // strip out the alternate allele if it's a ref call if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { myAlleles = new HashSet(1); @@ -447,7 +447,7 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } - private int calculateEndPos(Set alleles, Allele refAllele, GenomeLoc loc) { + private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length boolean isSNP = true, hasNullAltAllele = false; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index c60586017..3b4967cad 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import java.util.Collection; import java.util.Set; /** @@ -142,8 +143,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { public boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) { - Set evalAlts = eval.getAlternateAlleles(); - Set compAlts = comp.getAlternateAlleles(); + Collection evalAlts = eval.getAlternateAlleles(); + Collection compAlts = comp.getAlternateAlleles(); if ( evalAlts.size() != compAlts.size() ) { return true; } else { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index 8eaf976d0..4e6cc722d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -237,7 +237,7 @@ public class VariantValidationAssessor extends RodWalker infoMap.put("HomVarPct", String.format("%.1f", 100.0*homVarProp)); infoMap.put("HetPct", String.format("%.1f", 100.0*hetProp)); infoMap.put("HW", String.format("%.2f", hwScore)); - Set altAlleles = vContext.getAlternateAlleles(); + Collection altAlleles = vContext.getAlternateAlleles(); int altAlleleCount = altAlleles.size() == 0 ? 0 : vContext.getChromosomeCount(altAlleles.iterator().next()); if ( !isViolation && altAlleleCount > 0 ) numTrueVariants++; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 05e21c8b8..eac6d70b5 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -181,7 +181,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati protected Type type = null; /** A set of the alleles segregating in this context */ - protected LinkedHashSet alleles = null; + final protected List alleles; /** A mapping from sampleName -> genotype objects for all genotypes associated with this context */ protected Map genotypes = null; @@ -355,7 +355,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); } // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles - this.alleles = alleleCollectionToSet(new LinkedHashSet(), alleles); + this.alleles = makeAlleles(alleles); if ( genotypes == null ) { genotypes = NO_GENOTYPES; } @@ -445,7 +445,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more * @return vc subcontext */ - public VariantContext subContextFromGenotypes(Collection genotypes, Set alleles) { + public VariantContext subContextFromGenotypes(Collection genotypes, Collection alleles) { return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel()); } @@ -687,17 +687,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati return ref; } - /** Private helper routine that grabs the reference allele but doesn't throw an error if there's no such allele */ - -// private Allele getReferenceWithoutError() { -// for ( Allele allele : getAlleles() ) { -// if ( allele.isReference() ) { -// return allele; -// } -// } -// -// return null; -// } /** * @return true if the context is strictly bi-allelic @@ -754,7 +743,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * * @return the set of alleles */ - public Set getAlleles() { return alleles; } + public List getAlleles() { return alleles; } /** * Gets the alternate alleles. This method should return all the alleles present at the location, @@ -763,14 +752,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati * * @return the set of alternate alleles */ - public Set getAlternateAlleles() { - LinkedHashSet altAlleles = new LinkedHashSet(); - for ( Allele allele : alleles ) { - if ( allele.isNonReference() ) - altAlleles.add(allele); - } - - return Collections.unmodifiableSet(altAlleles); + public List getAlternateAlleles() { + return alleles.subList(1, alleles.size()); } /** @@ -797,14 +780,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @throws IllegalArgumentException if i is invalid */ public Allele getAlternateAllele(int i) { - int n = 0; - - for ( Allele allele : alleles ) { - if ( allele.isNonReference() && n++ == i ) - return allele; - } - - throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this); + return alleles.get(i+1); } /** @@ -813,8 +789,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati * regardless of ordering. Otherwise returns false. */ public boolean hasSameAlternateAllelesAs ( VariantContext other ) { - Set thisAlternateAlleles = getAlternateAlleles(); - Set otherAlternateAlleles = other.getAlternateAlleles(); + List thisAlternateAlleles = getAlternateAlleles(); + List otherAlternateAlleles = other.getAlternateAlleles(); if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) { return false; @@ -1121,7 +1097,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati if ( !hasGenotypes() ) return; - Set reportedAlleles = getAlleles(); + List reportedAlleles = getAlleles(); Set observedAlleles = new HashSet(); observedAlleles.add(getReference()); for ( Genotype g : getGenotypes().values() ) { @@ -1371,17 +1347,34 @@ public class VariantContext implements Feature { // to enable tribble intergrati } // protected basic manipulation routines - private static LinkedHashSet alleleCollectionToSet(LinkedHashSet dest, Collection alleles) { - for ( Allele a : alleles ) { - for ( Allele b : dest ) { + private static List makeAlleles(Collection alleles) { + final List alleleList = new ArrayList(alleles.size()); + + boolean sawRef = false; + for ( final Allele a : alleles ) { + for ( final Allele b : alleleList ) { if ( a.equals(b, true) ) throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a); } - dest.add(a); + // deal with the case where the first allele isn't the reference + if ( a.isReference() ) { + if ( sawRef ) + throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + alleleList.add(0, a); + sawRef = true; + } + else + alleleList.add(a); } - return dest; + if ( alleleList.isEmpty() ) + throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list"); + + if ( alleleList.get(0).isNonReference() ) + throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles); + + return alleleList; } public static Map genotypeCollectionToMap(Map dest, Collection genotypes) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index ae648d547..74ab7074a 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -29,6 +29,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.util.StringUtil; import org.apache.commons.jexl2.Expression; import org.apache.commons.jexl2.JexlEngine; +import org.apache.log4j.Logger; import org.broad.tribble.util.popgen.HardyWeinbergCalculation; import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; import org.broadinstitute.sting.utils.BaseUtils; @@ -44,6 +45,7 @@ import java.io.Serializable; import java.util.*; public class VariantContextUtils { + private static Logger logger = Logger.getLogger(VariantContextUtils.class); public final static String MERGE_INTERSECTION = "Intersection"; public final static String MERGE_FILTER_IN_ALL = "FilteredInAll"; public final static String MERGE_REF_IN_ALL = "ReferenceInAll"; @@ -511,7 +513,7 @@ public class VariantContextUtils { final String name = first.getSource(); final Allele refAllele = determineReferenceAllele(VCs); - final Set alleles = new TreeSet(); + final Set alleles = new LinkedHashSet(); final Set filters = new TreeSet(); final Map attributes = new TreeMap(); final Set inconsistentAttributes = new HashSet(); @@ -604,11 +606,14 @@ public class VariantContextUtils { } } - // if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate) + // if we have more alternate alleles in the merged VC than in one or more of the + // original VCs, we need to strip out the GL/PLs (because they are no longer accurate) for ( VariantContext vc : VCs ) { if (vc.alleles.size() == 1) continue; - if ( vc.alleles.size() != alleles.size()) { + if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) { + logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s", + genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles)); genotypes = stripPLs(genotypes); break; } @@ -661,6 +666,24 @@ public class VariantContextUtils { return merged; } + private static final boolean hasPLIncompatibleAlleles(final Collection alleleSet1, final Collection alleleSet2) { + final Iterator it1 = alleleSet1.iterator(); + final Iterator it2 = alleleSet2.iterator(); + + while ( it1.hasNext() && it2.hasNext() ) { + final Allele a1 = it1.next(); + final Allele a2 = it2.next(); + if ( ! a1.equals(a2) ) + return true; + } + + // by this point, at least one of the iterators is empty. All of the elements + // we've compared are equal up until this point. But it's possible that the + // sets aren't the same size, which is indicated by the test below. If they + // are of the same size, though, the sets are compatible + return it1.hasNext() || it2.hasNext(); + } + public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { // if all alleles of vc1 are a contained in alleles of vc2, return true if (!vc1.getReference().equals(vc2.getReference())) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java index 1f11b5886..77a76d388 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjectsIntegrationTest.java @@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest { @DataProvider(name = "data") public Object[][] createData() { - new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dc1ca75c6ecf32641967d61e167acfff"); - new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "df0fcb568a3a49fc74830103b2e26f6c"); + new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "996dd8f24f2db98305d0fd8f155fc7f9"); + new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "bf4e25cbc748e2441afd891e5c2722d6"); return TestParams.getTests(TestParams.class); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java index dee7bbd88..46b0df5b4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java @@ -70,7 +70,7 @@ public class DiffableReaderUnitTest extends BaseTest { private static void testLeaf(DiffNode rec, String field, Object expected) { DiffElement value = rec.getElement(field); Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec); - Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec); + Assert.assertEquals(value.getValue().getValue(), expected, "Expected to see leaf named " + field + " to have value " + expected + " in rec " + rec + " but got instead " + value.getValue().getValue()); } @Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders") @@ -95,7 +95,7 @@ public class DiffableReaderUnitTest extends BaseTest { testLeaf(rec1, "POS", 2646); testLeaf(rec1, "ID", "rs62635284"); testLeaf(rec1, "REF", Allele.create("G", true)); - testLeaf(rec1, "ALT", new HashSet(Arrays.asList(Allele.create("A")))); + testLeaf(rec1, "ALT", Arrays.asList(Allele.create("A"))); testLeaf(rec1, "QUAL", 0.15); testLeaf(rec1, "FILTER", Collections.emptySet()); testLeaf(rec1, "AC", "2"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 07b2f0566..ec62c4bfd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -257,34 +257,40 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { } @Test - public void testWithIndelAllelesPassedIn() { + public void testWithIndelAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, Arrays.asList("408d3aba4d094c067fc00a43992c2292")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1); + } + @Test + public void testWithIndelAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("94977d6e42e764280e9deaf4e3ac8c80")); + Arrays.asList("5e4e09354410b76fc0d822050d84132a")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); + } + + @Test + public void testWithIndelAllelesPassedIn3() { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("e66b7321e2ac91742ad3ef91040daafd")); + Arrays.asList("c599eedbeb422713b8a28529e805e4ae")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); + } + @Test + public void testWithIndelAllelesPassedIn4() { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("37e891bf1ac40caec9ea228f39c27e44")); + Arrays.asList("37d908a682ac269f8f19dec939ff5b01")); executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); - } - - - } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 3b1a97369..e30187a7c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -78,26 +78,26 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec); } - @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0"); } - @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo"); } - @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null"); } + @Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ea0a660cd04101ce7b534aba0310721d"); } + @Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "cb0350e7a9d2483993482b69f5432b64", " -setKey foo"); } + @Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "0571c48cc59cf244779caae52d562e79", " -setKey null"); } @Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0"); } // official project VCF files in tabix format - @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b"); } + @Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "75901304abc1daa41b1906f881aa7bbc"); } @Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e"); } - @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); } + @Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "d08e933b6c81246e998d3ece50ddfdcc"); } - @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format - @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); } + @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "01967686e0e02dbccd2590b70f2d049b"); } // official project VCF files in tabix format + @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "8c113199c4a93a4a408104b735d59044"); } // official project VCF files in tabix format + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "30e96a0cb614cd5bc056e1f7ec6d10bd"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } - @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); } + @Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "78a49597f1abf1c738e67d50c8fbed2b"); } - @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); } - @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); } + @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "9253d61ddb52c429adf0e153cef494ca"); } + @Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5012dfe65cf7e7d8f014e97e4a996aea"); } @Test public void threeWayWithRefs() { WalkerTestSpec spec = new WalkerTestSpec( @@ -127,10 +127,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest { executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec); } - @Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); } - @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); } - @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); } - @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); } + @Test public void complexTestFull() { combineComplexSites("", "2842337e9943366f7a4d5f148f701b8c"); } + @Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "39724318e6265d0318a3fe4609612785"); } + @Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); } + @Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); } @Test public void combineDBSNPDuplicateSites() { diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java index 663eb9ef6..9026f09d6 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java @@ -6,17 +6,19 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broadinstitute.sting.BaseTest; -import org.testng.Assert; import org.testng.annotations.BeforeSuite; -import org.testng.annotations.BeforeTest; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import org.testng.Assert; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; public class VariantContextUnitTest extends BaseTest { - Allele A, Aref, T, Tref; + Allele A, Aref, C, T, Tref; Allele del, delRef, ATC, ATCref; // A [ref] / T at 10 @@ -45,6 +47,7 @@ public class VariantContextUnitTest extends BaseTest { delRef = Allele.create("-", true); A = Allele.create("A"); + C = Allele.create("C"); Aref = Allele.create("A", true); T = Allele.create("T"); Tref = Allele.create("T", true); @@ -132,6 +135,16 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC); } + @Test + public void testMultipleSNPAlleleOrdering() { + final List allelesNaturalOrder = Arrays.asList(Aref, C, T); + final List allelesUnnaturalOrder = Arrays.asList(Aref, T, C); + VariantContext naturalVC = new VariantContext("natural", snpLoc, snpLocStart, snpLocStop, allelesNaturalOrder); + VariantContext unnaturalVC = new VariantContext("unnatural", snpLoc, snpLocStart, snpLocStop, allelesUnnaturalOrder); + Assert.assertEquals(new ArrayList(naturalVC.getAlleles()), allelesNaturalOrder); + Assert.assertEquals(new ArrayList(unnaturalVC.getAlleles()), allelesUnnaturalOrder); + } + @Test public void testCreatingSNPVariantContext() { @@ -249,11 +262,16 @@ public class VariantContextUnitTest extends BaseTest { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del)); } - @Test (expectedExceptions = IllegalStateException.class) + @Test (expectedExceptions = IllegalArgumentException.class) public void testBadConstructorArgs3() { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(del)); } + @Test (expectedExceptions = IllegalArgumentException.class) + public void testBadConstructorArgs4() { + new VariantContext("test", insLoc, insLocStart, insLocStop, Collections.emptyList()); + } + @Test (expectedExceptions = IllegalArgumentException.class) public void testBadConstructorArgsDuplicateAlleles1() { new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(Aref, T, T)); @@ -443,14 +461,70 @@ public class VariantContextUnitTest extends BaseTest { Assert.assertEquals(0, vc5.getChromosomeCount(Aref)); } + // -------------------------------------------------------------------------------- + // + // Test allele merging + // + // -------------------------------------------------------------------------------- - @Test - public void testManipulatingAlleles() { - // todo -- add tests that call add/set/remove + private class GetAllelesTest extends TestDataProvider { + List alleles; + + private GetAllelesTest(String name, Allele... arg) { + super(GetAllelesTest.class, name); + this.alleles = Arrays.asList(arg); + } + + public String toString() { + return String.format("%s input=%s", super.toString(), alleles); + } } - @Test - public void testManipulatingGenotypes() { - // todo -- add tests that call add/set/remove + @DataProvider(name = "getAlleles") + public Object[][] mergeAllelesData() { + new GetAllelesTest("A*", Aref); + new GetAllelesTest("-*", delRef); + new GetAllelesTest("A*/C", Aref, C); + new GetAllelesTest("A*/C/T", Aref, C, T); + new GetAllelesTest("A*/T/C", Aref, T, C); + new GetAllelesTest("A*/C/T/-", Aref, C, T, del); + new GetAllelesTest("A*/T/C/-", Aref, T, C, del); + new GetAllelesTest("A*/-/T/C", Aref, del, T, C); + + return GetAllelesTest.getTests(GetAllelesTest.class); + } + + @Test(dataProvider = "getAlleles") + public void testMergeAlleles(GetAllelesTest cfg) { + final List altAlleles = cfg.alleles.subList(1, cfg.alleles.size()); + final VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A'); + + Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles"); + Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size"); + Assert.assertEquals(vc.getAlternateAlleles(), altAlleles, "VC alt alleles not the same as input alt alleles"); + + + for ( int i = 0; i < cfg.alleles.size(); i++ ) { + final Allele inputAllele = cfg.alleles.get(i); + + Assert.assertTrue(vc.hasAllele(inputAllele)); + if ( inputAllele.isReference() ) { + final Allele nonRefVersion = Allele.create(inputAllele.getBases(), false); + Assert.assertTrue(vc.hasAllele(nonRefVersion, true)); + Assert.assertFalse(vc.hasAllele(nonRefVersion, false)); + } + + Assert.assertEquals(inputAllele, vc.getAllele(inputAllele.getBaseString())); + Assert.assertEquals(inputAllele, vc.getAllele(inputAllele.getBases())); + + if ( i > 0 ) { // it's an alt allele + Assert.assertEquals(inputAllele, vc.getAlternateAllele(i-1)); + } + } + + final Allele missingAllele = Allele.create("AACCGGTT"); // does not exist + Assert.assertNull(vc.getAllele(missingAllele.getBases())); + Assert.assertFalse(vc.hasAllele(missingAllele)); + Assert.assertFalse(vc.hasAllele(missingAllele, true)); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index 08db7bcde..845d9c216 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -66,8 +66,8 @@ public class VariantContextUtilsUnitTest extends BaseTest { return new Genotype(sample, Arrays.asList(a1, a2)); } - private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double l1, double l2, double l3) { - return new Genotype(sample, Arrays.asList(a1, a2), log10pError, new double[]{l1,l2,l3}); + private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double... pls) { + return new Genotype(sample, Arrays.asList(a1, a2), log10pError, pls); } @@ -144,15 +144,18 @@ public class VariantContextUtilsUnitTest extends BaseTest { new MergeAllelesTest(Arrays.asList(Aref, T), Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, T, C)); // in order of appearence new MergeAllelesTest(Arrays.asList(Aref, C, T), Arrays.asList(Aref, C), Arrays.asList(Aref, C, T)); + new MergeAllelesTest(Arrays.asList(Aref, C, T), Arrays.asList(Aref, C, T)); + new MergeAllelesTest(Arrays.asList(Aref, T, C), Arrays.asList(Aref, T, C)); + new MergeAllelesTest(Arrays.asList(Aref, T, C), Arrays.asList(Aref, C), - Arrays.asList(Aref, C, T)); // sorted by allele + Arrays.asList(Aref, T, C)); // in order of appearence // The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant. // The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference. @@ -167,10 +170,12 @@ public class VariantContextUtilsUnitTest extends BaseTest { Arrays.asList(delRef, ATC, ATCATC), Arrays.asList(delRef, ATC, ATCATC)); + // alleles in the order we see them new MergeAllelesTest(Arrays.asList(delRef, ATCATC), Arrays.asList(delRef, ATC, ATCATC), - Arrays.asList(delRef, ATC, ATCATC)); + Arrays.asList(delRef, ATCATC, ATC)); + // same new MergeAllelesTest(Arrays.asList(delRef, ATC), Arrays.asList(delRef, ATCATC), Arrays.asList(delRef, ATC, ATCATC)); @@ -434,7 +439,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { new MergeGenotypesTest("PerserveAlleles", "1,2", makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)), - makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2))); + makeVC("3", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2))); new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2", makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)), @@ -446,7 +451,31 @@ public class VariantContextUtilsUnitTest extends BaseTest { makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)), makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3))); + // // merging genothpes with PLs + // + + // first, do no harm + new MergeGenotypesTest("OrderedPLs", "1", + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1, 1, 2, 3)), + makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1, 1, 2, 3))); + + // first, do no harm + new MergeGenotypesTest("OrderedPLs-3Alleles", "1", + makeVC("1", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)), + makeVC("1", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6))); + + // first, do no harm + new MergeGenotypesTest("OrderedPLs-3Alleles-2", "1", + makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)), + makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6))); + + // first, do no harm + new MergeGenotypesTest("OrderedPLs-3Alleles-2", "1", + makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)), + makeVC("1", Arrays.asList(Aref, T, C), makeG("s2", Aref, C, 1, 1, 2, 3, 4, 5, 6)), + makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6), makeG("s2", Aref, C, 1, 1, 2, 3, 4, 5, 6))); + new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-2,1", "2,1", makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1,5,0,3)), makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)), @@ -458,6 +487,12 @@ public class VariantContextUtilsUnitTest extends BaseTest { // no likelihoods on result since type changes to mixed multiallelic makeVC("3", Arrays.asList(Aref, ATC, T), makeG("s1", Aref, ATC, 1), makeG("s3", Aref, T, 3))); + new MergeGenotypesTest("MultipleSamplePLsDifferentOrder", "1,2", + makeVC("1", Arrays.asList(Aref, C, T), makeG("s1", Aref, C, 1, 1, 2, 3, 4, 5, 6)), + makeVC("2", Arrays.asList(Aref, T, C), makeG("s2", Aref, T, 2, 6, 5, 4, 3, 2, 1)), + // no likelihoods on result since type changes to mixed multiallelic + makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, C, 1), makeG("s2", Aref, T, 2))); + return MergeGenotypesTest.getTests(MergeGenotypesTest.class); } @@ -493,11 +528,11 @@ public class VariantContextUtilsUnitTest extends BaseTest { Genotype value = entry.getValue(); Genotype expectedValue = expected.get(key); - Assert.assertEquals(value.alleles, expectedValue.alleles); - Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError()); - Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods()); + Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal"); + Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError(), "GQ values aren't equal"); + Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not"); if ( value.hasLikelihoods() ) - Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector()); + Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector(), "Genotype likelihoods aren't equal"); } }