Merging many bug fixes to reduce reads
This commit is contained in:
commit
96c875399c
|
|
@ -26,14 +26,31 @@ public class Genotype {
|
|||
protected boolean filtersWereAppliedToContext;
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||
this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null);
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||
if ( alleles != null )
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||
if ( log10Likelihoods != null )
|
||||
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||
filtersWereAppliedToContext = filters != null;
|
||||
this.isPhased = isPhased;
|
||||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new Genotype for sampleName with genotype according to alleles.
|
||||
* @param sampleName
|
||||
* @param alleles
|
||||
* @param negLog10PError the confidence in these alleles
|
||||
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
|
||||
*/
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods) {
|
||||
this(sampleName, alleles, negLog10PError, null, null, false, log10Likelihoods);
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
||||
this(sampleName, alleles, negLog10PError, null, null, false);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -89,8 +89,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
|
||||
|
||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); }
|
||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format
|
||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); }
|
||||
|
||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
|
||||
|
||||
|
|
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||
1,
|
||||
Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9"));
|
||||
Arrays.asList("212d9d3df10bb29e2c7fb226da422dc0"));
|
||||
executeTest("threeWayWithRefs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||
1,
|
||||
Arrays.asList(""));
|
||||
Arrays.asList("5969446769cb8377daa2db29304ae6b5"));
|
||||
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
|
||||
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
||||
metaData.add(new VCFHeaderLine("two", "2"));
|
||||
additionalColumns.add("FORMAT");
|
||||
additionalColumns.add("extra1");
|
||||
additionalColumns.add("extra2");
|
||||
return new VCFHeader(metaData, additionalColumns);
|
||||
|
|
@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
Assert.assertTrue(additionalColumns.contains(key));
|
||||
index++;
|
||||
}
|
||||
Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */);
|
||||
Assert.assertEquals(index, additionalColumns.size());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
|
|
@ -14,10 +15,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Basic unit test for RecalData
|
||||
*/
|
||||
public class VariantContextUnitTest {
|
||||
public class VariantContextUnitTest extends BaseTest {
|
||||
Allele A, Aref, T, Tref;
|
||||
Allele del, delRef, ATC, ATCref;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,202 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.apache.log4j.Priority;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.Test;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||
Allele Aref, T, delRef, ATC;
|
||||
Genotype ref1, snp1, snp2, indel1, indelref;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3;
|
||||
|
||||
@BeforeSuite
|
||||
public void setup() {
|
||||
final File referenceFile = new File(b37KGReference);
|
||||
try {
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
}
|
||||
catch(FileNotFoundException ex) {
|
||||
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
|
||||
}
|
||||
|
||||
// alleles
|
||||
Aref = Allele.create("A", true);
|
||||
delRef = Allele.create("-", true);
|
||||
T = Allele.create("T");
|
||||
ATC = Allele.create("ATC");
|
||||
|
||||
ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10});
|
||||
snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20});
|
||||
snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0});
|
||||
indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30});
|
||||
indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30});
|
||||
|
||||
refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1));
|
||||
snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1));
|
||||
snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2));
|
||||
snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1));
|
||||
snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2));
|
||||
indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref));
|
||||
indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1));
|
||||
indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1));
|
||||
}
|
||||
|
||||
private VariantContext makeVC(String source, List<Allele> alleles) {
|
||||
return makeVC(source, alleles, null, null);
|
||||
}
|
||||
|
||||
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) {
|
||||
return makeVC(source, alleles, genotypes, null);
|
||||
}
|
||||
|
||||
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
||||
int start = 10;
|
||||
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||
return new VariantContext(source, "1", start, stop, alleles,
|
||||
VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes),
|
||||
1.0, filters, null, (byte)'C');
|
||||
}
|
||||
|
||||
private class SimpleMergeTest extends TestDataProvider {
|
||||
List<VariantContext> inputVCs;
|
||||
VariantContext expectedVC;
|
||||
|
||||
private SimpleMergeTest(VariantContext... vcsArg) {
|
||||
super(SimpleMergeTest.class);
|
||||
LinkedList<VariantContext> allVCs = new LinkedList<VariantContext>(Arrays.asList(vcsArg));
|
||||
expectedVC = allVCs.pollLast();
|
||||
inputVCs = allVCs;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "simplemergedata")
|
||||
public Object[][] createSimpleMergeData() {
|
||||
// first, do no harm
|
||||
new SimpleMergeTest(refVC, refVC);
|
||||
new SimpleMergeTest(snpVC1, snpVC1);
|
||||
new SimpleMergeTest(indelVC1, indelVC1);
|
||||
new SimpleMergeTest(indelVC3, indelVC3);
|
||||
|
||||
new SimpleMergeTest(refVC, snpVC1, snpVC3);
|
||||
new SimpleMergeTest(snpVC1, snpVC2, snpVC2);
|
||||
new SimpleMergeTest(refVC, snpVC2, snpVC4);
|
||||
|
||||
new SimpleMergeTest(indelVC1, indelVC2, indelVC3);
|
||||
new SimpleMergeTest(indelVC1, indelVC3, indelVC3);
|
||||
new SimpleMergeTest(indelVC2, indelVC3, indelVC3);
|
||||
|
||||
return SimpleMergeTest.getTests(SimpleMergeTest.class);
|
||||
}
|
||||
|
||||
private class SimpleMergeRSIDTest extends TestDataProvider {
|
||||
List<String> inputs;
|
||||
String expected;
|
||||
|
||||
private SimpleMergeRSIDTest(String... arg) {
|
||||
super(SimpleMergeRSIDTest.class);
|
||||
LinkedList<String> allStrings = new LinkedList<String>(Arrays.asList(arg));
|
||||
expected = allStrings.pollLast();
|
||||
inputs = allStrings;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "simplemergersiddata")
|
||||
public Object[][] createSimpleMergeRSIDData() {
|
||||
new SimpleMergeRSIDTest(".", ".");
|
||||
new SimpleMergeRSIDTest("rs1", "rs1");
|
||||
new SimpleMergeRSIDTest(".", "rs1", "rs1");
|
||||
new SimpleMergeRSIDTest("rs1", ".", "rs1");
|
||||
new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2");
|
||||
new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1");
|
||||
new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1");
|
||||
new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1");
|
||||
new SimpleMergeRSIDTest("rs1", ".", ".", "rs1");
|
||||
new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3");
|
||||
|
||||
return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "simplemergersiddata")
|
||||
public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
|
||||
List<VariantContext> inputs = new ArrayList<VariantContext>();
|
||||
for ( String id : cfg.inputs ) {
|
||||
MutableVariantContext vc = new MutableVariantContext(snpVC1);
|
||||
if ( ! id.equals(".") ) vc.setID(id);
|
||||
inputs.add(vc);
|
||||
|
||||
}
|
||||
|
||||
VariantContext merged = myMerge(inputs);
|
||||
Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected);
|
||||
}
|
||||
|
||||
private VariantContext myMerge(List<VariantContext> inputs) {
|
||||
List<String> priority = new ArrayList<String>();
|
||||
for ( VariantContext vc : inputs ) priority.add(vc.getSource());
|
||||
|
||||
return VariantContextUtils.simpleMerge(genomeLocParser,
|
||||
inputs, priority,
|
||||
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||
}
|
||||
|
||||
// todo -- add tests for subset merging, especially with correct PLs
|
||||
// todo -- test priority list
|
||||
// todo -- test FilteredRecordMergeType
|
||||
// todo -- no annotate origin
|
||||
// todo -- test set key
|
||||
// todo -- test filtered are uncalled
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
<dependencies>
|
||||
<!-- Recalibration analysis script -->
|
||||
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.*" />
|
||||
<package name="org.broadinstitute.sting.gatk.walkers.recalibration" />
|
||||
</dependencies>
|
||||
</executable>
|
||||
<resources>
|
||||
|
|
|
|||
Loading…
Reference in New Issue