Merge branch 'vcfAlleles'

This commit is contained in:
Mark DePristo 2011-10-10 13:23:52 -04:00
commit ac41b303fd
15 changed files with 232 additions and 104 deletions

View File

@ -47,7 +47,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
if (!g.isHet())
return null;
Set<Allele> altAlleles = vc.getAlternateAlleles();
Collection<Allele> altAlleles = vc.getAlternateAlleles();
if ( altAlleles.size() == 0 )
return null;

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -76,7 +77,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
*/
protected abstract void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, Genotype> GLs, Set<Allele> Alleles,
Map<String, Genotype> GLs, List<Allele> Alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors);

View File

@ -29,19 +29,14 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.*;
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
//
@ -58,7 +53,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
public void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, Genotype> GLs, Set<Allele>alleles,
Map<String, Genotype> GLs, List<Allele> alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
final int numAlleles = alleles.size();

View File

@ -54,7 +54,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
protected void getLog10PNonRef(RefMetaDataTracker tracker,
ReferenceContext ref,
Map<String, Genotype> GLs, Set<Allele>alleles,
Map<String, Genotype> GLs, List<Allele> alleles,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors) {
initializeAFMatrix(GLs);

View File

@ -423,7 +423,7 @@ public class UnifiedGenotyperEngine {
int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc);
Set<Allele> myAlleles = vc.getAlleles();
Set<Allele> myAlleles = new HashSet<Allele>(vc.getAlleles());
// strip out the alternate allele if it's a ref call
if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) {
myAlleles = new HashSet<Allele>(1);
@ -447,7 +447,7 @@ public class UnifiedGenotyperEngine {
return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF));
}
private int calculateEndPos(Set<Allele> alleles, Allele refAllele, GenomeLoc loc) {
private int calculateEndPos(Collection<Allele> alleles, Allele refAllele, GenomeLoc loc) {
// TODO - temp fix until we can deal with extended events properly
// for indels, stop location is one more than ref allele length
boolean isSNP = true, hasNullAltAllele = false;

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
import java.util.Set;
/**
@ -142,8 +143,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
public boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) {
Set<Allele> evalAlts = eval.getAlternateAlleles();
Set<Allele> compAlts = comp.getAlternateAlleles();
Collection<Allele> evalAlts = eval.getAlternateAlleles();
Collection<Allele> compAlts = comp.getAlternateAlleles();
if ( evalAlts.size() != compAlts.size() ) {
return true;
} else {

View File

@ -237,7 +237,7 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
infoMap.put("HomVarPct", String.format("%.1f", 100.0*homVarProp));
infoMap.put("HetPct", String.format("%.1f", 100.0*hetProp));
infoMap.put("HW", String.format("%.2f", hwScore));
Set<Allele> altAlleles = vContext.getAlternateAlleles();
Collection<Allele> altAlleles = vContext.getAlternateAlleles();
int altAlleleCount = altAlleles.size() == 0 ? 0 : vContext.getChromosomeCount(altAlleles.iterator().next());
if ( !isViolation && altAlleleCount > 0 )
numTrueVariants++;

View File

@ -181,7 +181,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
protected Type type = null;
/** A set of the alleles segregating in this context */
protected LinkedHashSet<Allele> alleles = null;
final protected List<Allele> alleles;
/** A mapping from sampleName -> genotype objects for all genotypes associated with this context */
protected Map<String, Genotype> genotypes = null;
@ -355,7 +355,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
// we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
this.alleles = alleleCollectionToSet(new LinkedHashSet<Allele>(), alleles);
this.alleles = makeAlleles(alleles);
if ( genotypes == null ) { genotypes = NO_GENOTYPES; }
@ -445,7 +445,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more
* @return vc subcontext
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Set<Allele> alleles) {
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Collection<Allele> alleles) {
return new VariantContext(getSource(), contig, start, stop, alleles, genotypes != null ? genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, getNegLog10PError(), filtersWereApplied() ? getFilters() : null, getAttributes(), getReferenceBaseForIndel());
}
@ -687,17 +687,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return ref;
}
/** Private helper routine that grabs the reference allele but doesn't throw an error if there's no such allele */
// private Allele getReferenceWithoutError() {
// for ( Allele allele : getAlleles() ) {
// if ( allele.isReference() ) {
// return allele;
// }
// }
//
// return null;
// }
/**
* @return true if the context is strictly bi-allelic
@ -754,7 +743,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*
* @return the set of alleles
*/
public Set<Allele> getAlleles() { return alleles; }
public List<Allele> getAlleles() { return alleles; }
/**
* Gets the alternate alleles. This method should return all the alleles present at the location,
@ -763,14 +752,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*
* @return the set of alternate alleles
*/
public Set<Allele> getAlternateAlleles() {
LinkedHashSet<Allele> altAlleles = new LinkedHashSet<Allele>();
for ( Allele allele : alleles ) {
if ( allele.isNonReference() )
altAlleles.add(allele);
}
return Collections.unmodifiableSet(altAlleles);
public List<Allele> getAlternateAlleles() {
return alleles.subList(1, alleles.size());
}
/**
@ -797,14 +780,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @throws IllegalArgumentException if i is invalid
*/
public Allele getAlternateAllele(int i) {
int n = 0;
for ( Allele allele : alleles ) {
if ( allele.isNonReference() && n++ == i )
return allele;
}
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
return alleles.get(i+1);
}
/**
@ -813,8 +789,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* regardless of ordering. Otherwise returns false.
*/
public boolean hasSameAlternateAllelesAs ( VariantContext other ) {
Set<Allele> thisAlternateAlleles = getAlternateAlleles();
Set<Allele> otherAlternateAlleles = other.getAlternateAlleles();
List<Allele> thisAlternateAlleles = getAlternateAlleles();
List<Allele> otherAlternateAlleles = other.getAlternateAlleles();
if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) {
return false;
@ -1121,7 +1097,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
if ( !hasGenotypes() )
return;
Set<Allele> reportedAlleles = getAlleles();
List<Allele> reportedAlleles = getAlleles();
Set<Allele> observedAlleles = new HashSet<Allele>();
observedAlleles.add(getReference());
for ( Genotype g : getGenotypes().values() ) {
@ -1371,17 +1347,34 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
// protected basic manipulation routines
private static LinkedHashSet<Allele> alleleCollectionToSet(LinkedHashSet<Allele> dest, Collection<Allele> alleles) {
for ( Allele a : alleles ) {
for ( Allele b : dest ) {
private static List<Allele> makeAlleles(Collection<Allele> alleles) {
final List<Allele> alleleList = new ArrayList<Allele>(alleles.size());
boolean sawRef = false;
for ( final Allele a : alleles ) {
for ( final Allele b : alleleList ) {
if ( a.equals(b, true) )
throw new IllegalArgumentException("Duplicate allele added to VariantContext: " + a);
}
dest.add(a);
// deal with the case where the first allele isn't the reference
if ( a.isReference() ) {
if ( sawRef )
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
alleleList.add(0, a);
sawRef = true;
}
else
alleleList.add(a);
}
return dest;
if ( alleleList.isEmpty() )
throw new IllegalArgumentException("Cannot create a VariantContext with an empty allele list");
if ( alleleList.get(0).isNonReference() )
throw new IllegalArgumentException("Alleles for a VariantContext must contain a single reference allele: " + alleles);
return alleleList;
}
public static Map<String, Genotype> genotypeCollectionToMap(Map<String, Genotype> dest, Collection<Genotype> genotypes) {

View File

@ -29,6 +29,7 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.util.StringUtil;
import org.apache.commons.jexl2.Expression;
import org.apache.commons.jexl2.JexlEngine;
import org.apache.log4j.Logger;
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
import org.broadinstitute.sting.utils.BaseUtils;
@ -44,6 +45,7 @@ import java.io.Serializable;
import java.util.*;
public class VariantContextUtils {
private static Logger logger = Logger.getLogger(VariantContextUtils.class);
public final static String MERGE_INTERSECTION = "Intersection";
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
@ -511,7 +513,7 @@ public class VariantContextUtils {
final String name = first.getSource();
final Allele refAllele = determineReferenceAllele(VCs);
final Set<Allele> alleles = new TreeSet<Allele>();
final Set<Allele> alleles = new LinkedHashSet<Allele>();
final Set<String> filters = new TreeSet<String>();
final Map<String, Object> attributes = new TreeMap<String, Object>();
final Set<String> inconsistentAttributes = new HashSet<String>();
@ -604,11 +606,14 @@ public class VariantContextUtils {
}
}
// if we have more alternate alleles in the merged VC than in one or more of the original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
// if we have more alternate alleles in the merged VC than in one or more of the
// original VCs, we need to strip out the GL/PLs (because they are no longer accurate)
for ( VariantContext vc : VCs ) {
if (vc.alleles.size() == 1)
continue;
if ( vc.alleles.size() != alleles.size()) {
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
genotypes = stripPLs(genotypes);
break;
}
@ -661,6 +666,24 @@ public class VariantContextUtils {
return merged;
}
private static final boolean hasPLIncompatibleAlleles(final Collection<Allele> alleleSet1, final Collection<Allele> alleleSet2) {
final Iterator<Allele> it1 = alleleSet1.iterator();
final Iterator<Allele> it2 = alleleSet2.iterator();
while ( it1.hasNext() && it2.hasNext() ) {
final Allele a1 = it1.next();
final Allele a2 = it2.next();
if ( ! a1.equals(a2) )
return true;
}
// by this point, at least one of the iterators is empty. All of the elements
// we've compared are equal up until this point. But it's possible that the
// sets aren't the same size, which is indicated by the test below. If they
// are of the same size, though, the sets are compatible
return it1.hasNext() || it2.hasNext();
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))

View File

@ -50,8 +50,8 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "dc1ca75c6ecf32641967d61e167acfff");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "df0fcb568a3a49fc74830103b2e26f6c");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", "996dd8f24f2db98305d0fd8f155fc7f9");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", "bf4e25cbc748e2441afd891e5c2722d6");
return TestParams.getTests(TestParams.class);
}

View File

@ -70,7 +70,7 @@ public class DiffableReaderUnitTest extends BaseTest {
private static void testLeaf(DiffNode rec, String field, Object expected) {
DiffElement value = rec.getElement(field);
Assert.assertNotNull(value, "Expected to see leaf named " + field + " in rec " + rec);
Assert.assertEquals(value.getValue().getValue(), expected, "Expected to leaf named " + field + " to have value " + expected + " in rec " + rec);
Assert.assertEquals(value.getValue().getValue(), expected, "Expected to see leaf named " + field + " to have value " + expected + " in rec " + rec + " but got instead " + value.getValue().getValue());
}
@Test(enabled = true, dependsOnMethods = "testPluggableDiffableReaders")
@ -95,7 +95,7 @@ public class DiffableReaderUnitTest extends BaseTest {
testLeaf(rec1, "POS", 2646);
testLeaf(rec1, "ID", "rs62635284");
testLeaf(rec1, "REF", Allele.create("G", true));
testLeaf(rec1, "ALT", new HashSet<Allele>(Arrays.asList(Allele.create("A"))));
testLeaf(rec1, "ALT", Arrays.asList(Allele.create("A")));
testLeaf(rec1, "QUAL", 0.15);
testLeaf(rec1, "FILTER", Collections.<Object>emptySet());
testLeaf(rec1, "AC", "2");

View File

@ -257,34 +257,40 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
}
@Test
public void testWithIndelAllelesPassedIn() {
public void testWithIndelAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("408d3aba4d094c067fc00a43992c2292"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@Test
public void testWithIndelAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("94977d6e42e764280e9deaf4e3ac8c80"));
Arrays.asList("5e4e09354410b76fc0d822050d84132a"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@Test
public void testWithIndelAllelesPassedIn3() {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
Arrays.asList("e66b7321e2ac91742ad3ef91040daafd"));
Arrays.asList("c599eedbeb422713b8a28529e805e4ae"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@Test
public void testWithIndelAllelesPassedIn4() {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("37e891bf1ac40caec9ea228f39c27e44"));
Arrays.asList("37d908a682ac269f8f19dec939ff5b01"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
}
}

View File

@ -78,26 +78,26 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combine PLs 1:" + new File(file1).getName() + " 2:" + new File(file2).getName(), spec);
}
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "c608b9fc1e36dba6cebb4f259883f9f0"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "20caad94411d6ab48153b214de916df8", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "004f3065cb1bc2ce2f9afd695caf0b48", " -setKey null"); }
@Test public void test1SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "ea0a660cd04101ce7b534aba0310721d"); }
@Test public void test2SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "cb0350e7a9d2483993482b69f5432b64", " -setKey foo"); }
@Test public void test3SNP() { test1InOut("pilot2.snps.vcf4.genotypes.vcf", "0571c48cc59cf244779caae52d562e79", " -setKey null"); }
@Test public void testOfficialCEUPilotCalls() { test1InOut("CEU.trio.2010_03.genotypes.vcf.gz", "c9c901ff9ef2a982624b203a8086dff0"); } // official project VCF files in tabix format
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "7593be578d4274d672fc22fced38012b"); }
@Test public void test1Indel1() { test1InOut("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "75901304abc1daa41b1906f881aa7bbc"); }
@Test public void test1Indel2() { test1InOut("CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "1cd467863c4e948fadd970681552d57e"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "d08e933b6c81246e998d3ece50ddfdcc"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); }
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "01967686e0e02dbccd2590b70f2d049b"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "8c113199c4a93a4a408104b735d59044"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "30e96a0cb614cd5bc056e1f7ec6d10bd"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "78a49597f1abf1c738e67d50c8fbed2b"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "9253d61ddb52c429adf0e153cef494ca"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "5012dfe65cf7e7d8f014e97e4a996aea"); }
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -127,10 +127,10 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
executeTest("combineComplexSites 1:" + new File(file1).getName() + " 2:" + new File(file2).getName() + " args = " + args, spec);
}
@Test public void complexTestFull() { combineComplexSites("", "b5a53ee92bdaacd2bb3327e9004ae058"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
@Test public void complexTestFull() { combineComplexSites("", "2842337e9943366f7a4d5f148f701b8c"); }
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "39724318e6265d0318a3fe4609612785"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "fe9bb02ab8b3d0dd2ad6373ebdb6d915"); }
@Test
public void combineDBSNPDuplicateSites() {

View File

@ -6,17 +6,19 @@ package org.broadinstitute.sting.utils.variantcontext;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class VariantContextUnitTest extends BaseTest {
Allele A, Aref, T, Tref;
Allele A, Aref, C, T, Tref;
Allele del, delRef, ATC, ATCref;
// A [ref] / T at 10
@ -45,6 +47,7 @@ public class VariantContextUnitTest extends BaseTest {
delRef = Allele.create("-", true);
A = Allele.create("A");
C = Allele.create("C");
Aref = Allele.create("A", true);
T = Allele.create("T");
Tref = Allele.create("T", true);
@ -132,6 +135,16 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC);
}
@Test
public void testMultipleSNPAlleleOrdering() {
final List<Allele> allelesNaturalOrder = Arrays.asList(Aref, C, T);
final List<Allele> allelesUnnaturalOrder = Arrays.asList(Aref, T, C);
VariantContext naturalVC = new VariantContext("natural", snpLoc, snpLocStart, snpLocStop, allelesNaturalOrder);
VariantContext unnaturalVC = new VariantContext("unnatural", snpLoc, snpLocStart, snpLocStop, allelesUnnaturalOrder);
Assert.assertEquals(new ArrayList<Allele>(naturalVC.getAlleles()), allelesNaturalOrder);
Assert.assertEquals(new ArrayList<Allele>(unnaturalVC.getAlleles()), allelesUnnaturalOrder);
}
@Test
public void testCreatingSNPVariantContext() {
@ -249,11 +262,16 @@ public class VariantContextUnitTest extends BaseTest {
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del));
}
@Test (expectedExceptions = IllegalStateException.class)
@Test (expectedExceptions = IllegalArgumentException.class)
public void testBadConstructorArgs3() {
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(del));
}
@Test (expectedExceptions = IllegalArgumentException.class)
public void testBadConstructorArgs4() {
new VariantContext("test", insLoc, insLocStart, insLocStop, Collections.<Allele>emptyList());
}
@Test (expectedExceptions = IllegalArgumentException.class)
public void testBadConstructorArgsDuplicateAlleles1() {
new VariantContext("test", insLoc, insLocStart, insLocStop, Arrays.asList(Aref, T, T));
@ -443,14 +461,70 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertEquals(0, vc5.getChromosomeCount(Aref));
}
// --------------------------------------------------------------------------------
//
// Test allele merging
//
// --------------------------------------------------------------------------------
@Test
public void testManipulatingAlleles() {
// todo -- add tests that call add/set/remove
private class GetAllelesTest extends TestDataProvider {
List<Allele> alleles;
private GetAllelesTest(String name, Allele... arg) {
super(GetAllelesTest.class, name);
this.alleles = Arrays.asList(arg);
}
public String toString() {
return String.format("%s input=%s", super.toString(), alleles);
}
}
@Test
public void testManipulatingGenotypes() {
// todo -- add tests that call add/set/remove
@DataProvider(name = "getAlleles")
public Object[][] mergeAllelesData() {
new GetAllelesTest("A*", Aref);
new GetAllelesTest("-*", delRef);
new GetAllelesTest("A*/C", Aref, C);
new GetAllelesTest("A*/C/T", Aref, C, T);
new GetAllelesTest("A*/T/C", Aref, T, C);
new GetAllelesTest("A*/C/T/-", Aref, C, T, del);
new GetAllelesTest("A*/T/C/-", Aref, T, C, del);
new GetAllelesTest("A*/-/T/C", Aref, del, T, C);
return GetAllelesTest.getTests(GetAllelesTest.class);
}
@Test(dataProvider = "getAlleles")
public void testMergeAlleles(GetAllelesTest cfg) {
final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
final VariantContext vc = new VariantContext("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles, null, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, (byte)'A');
Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
Assert.assertEquals(vc.getAlternateAlleles(), altAlleles, "VC alt alleles not the same as input alt alleles");
for ( int i = 0; i < cfg.alleles.size(); i++ ) {
final Allele inputAllele = cfg.alleles.get(i);
Assert.assertTrue(vc.hasAllele(inputAllele));
if ( inputAllele.isReference() ) {
final Allele nonRefVersion = Allele.create(inputAllele.getBases(), false);
Assert.assertTrue(vc.hasAllele(nonRefVersion, true));
Assert.assertFalse(vc.hasAllele(nonRefVersion, false));
}
Assert.assertEquals(inputAllele, vc.getAllele(inputAllele.getBaseString()));
Assert.assertEquals(inputAllele, vc.getAllele(inputAllele.getBases()));
if ( i > 0 ) { // it's an alt allele
Assert.assertEquals(inputAllele, vc.getAlternateAllele(i-1));
}
}
final Allele missingAllele = Allele.create("AACCGGTT"); // does not exist
Assert.assertNull(vc.getAllele(missingAllele.getBases()));
Assert.assertFalse(vc.hasAllele(missingAllele));
Assert.assertFalse(vc.hasAllele(missingAllele, true));
}
}

View File

@ -66,8 +66,8 @@ public class VariantContextUtilsUnitTest extends BaseTest {
return new Genotype(sample, Arrays.asList(a1, a2));
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double l1, double l2, double l3) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError, new double[]{l1,l2,l3});
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double... pls) {
return new Genotype(sample, Arrays.asList(a1, a2), log10pError, pls);
}
@ -144,15 +144,18 @@ public class VariantContextUtilsUnitTest extends BaseTest {
new MergeAllelesTest(Arrays.asList(Aref, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
Arrays.asList(Aref, T, C)); // in order of appearence
new MergeAllelesTest(Arrays.asList(Aref, C, T),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, C, T), Arrays.asList(Aref, C, T));
new MergeAllelesTest(Arrays.asList(Aref, T, C), Arrays.asList(Aref, T, C));
new MergeAllelesTest(Arrays.asList(Aref, T, C),
Arrays.asList(Aref, C),
Arrays.asList(Aref, C, T)); // sorted by allele
Arrays.asList(Aref, T, C)); // in order of appearence
// The following is actually a pathological case - there's no way on a vcf to represent a null allele that's non-variant.
// The code converts this (correctly) to a single-base non-variant vc with whatever base was there as a reference.
@ -167,10 +170,12 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
// alleles in the order we see them
new MergeAllelesTest(Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
Arrays.asList(delRef, ATCATC, ATC));
// same
new MergeAllelesTest(Arrays.asList(delRef, ATC),
Arrays.asList(delRef, ATCATC),
Arrays.asList(delRef, ATC, ATCATC));
@ -434,7 +439,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
new MergeGenotypesTest("PerserveAlleles", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
makeVC("2", Arrays.asList(Aref, C), makeG("s2", Aref, C, 2)),
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
makeVC("3", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1), makeG("s2", Aref, C, 2)));
new MergeGenotypesTest("TakeGenotypePartialOverlap-1,2", "1,2",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1)),
@ -446,7 +451,31 @@ public class VariantContextUtilsUnitTest extends BaseTest {
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)),
makeVC("3", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2), makeG("s3", Aref, T, 3)));
//
// merging genothpes with PLs
//
// first, do no harm
new MergeGenotypesTest("OrderedPLs", "1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1, 1, 2, 3)),
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1, 1, 2, 3)));
// first, do no harm
new MergeGenotypesTest("OrderedPLs-3Alleles", "1",
makeVC("1", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)),
makeVC("1", Arrays.asList(Aref, C, T), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)));
// first, do no harm
new MergeGenotypesTest("OrderedPLs-3Alleles-2", "1",
makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)),
makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)));
// first, do no harm
new MergeGenotypesTest("OrderedPLs-3Alleles-2", "1",
makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6)),
makeVC("1", Arrays.asList(Aref, T, C), makeG("s2", Aref, C, 1, 1, 2, 3, 4, 5, 6)),
makeVC("1", Arrays.asList(Aref, T, C), makeG("s1", Aref, T, 1, 1, 2, 3, 4, 5, 6), makeG("s2", Aref, C, 1, 1, 2, 3, 4, 5, 6)));
new MergeGenotypesTest("TakeGenotypePartialOverlapWithPLs-2,1", "2,1",
makeVC("1", Arrays.asList(Aref, T), makeG("s1", Aref, T, 1,5,0,3)),
makeVC("2", Arrays.asList(Aref, T), makeG("s1", Aref, T, 2,4,0,2), makeG("s3", Aref, T, 3,3,0,2)),
@ -458,6 +487,12 @@ public class VariantContextUtilsUnitTest extends BaseTest {
// no likelihoods on result since type changes to mixed multiallelic
makeVC("3", Arrays.asList(Aref, ATC, T), makeG("s1", Aref, ATC, 1), makeG("s3", Aref, T, 3)));
new MergeGenotypesTest("MultipleSamplePLsDifferentOrder", "1,2",
makeVC("1", Arrays.asList(Aref, C, T), makeG("s1", Aref, C, 1, 1, 2, 3, 4, 5, 6)),
makeVC("2", Arrays.asList(Aref, T, C), makeG("s2", Aref, T, 2, 6, 5, 4, 3, 2, 1)),
// no likelihoods on result since type changes to mixed multiallelic
makeVC("3", Arrays.asList(Aref, C, T), makeG("s1", Aref, C, 1), makeG("s2", Aref, T, 2)));
return MergeGenotypesTest.getTests(MergeGenotypesTest.class);
}
@ -493,11 +528,11 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Genotype value = entry.getValue();
Genotype expectedValue = expected.get(key);
Assert.assertEquals(value.alleles, expectedValue.alleles);
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError());
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods());
Assert.assertEquals(value.alleles, expectedValue.alleles, "Alleles in Genotype aren't equal");
Assert.assertEquals(value.getNegLog10PError(), expectedValue.getNegLog10PError(), "GQ values aren't equal");
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not");
if ( value.hasLikelihoods() )
Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector());
Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector(), "Genotype likelihoods aren't equal");
}
}