Merging many bug fixes to reduce reads
This commit is contained in:
commit
96c875399c
|
|
@ -26,14 +26,31 @@ public class Genotype {
|
||||||
protected boolean filtersWereAppliedToContext;
|
protected boolean filtersWereAppliedToContext;
|
||||||
|
|
||||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||||
|
this(sampleName, alleles, negLog10PError, filters, attributes, isPhased, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||||
if ( alleles != null )
|
if ( alleles != null )
|
||||||
this.alleles = Collections.unmodifiableList(alleles);
|
this.alleles = Collections.unmodifiableList(alleles);
|
||||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||||
|
if ( log10Likelihoods != null )
|
||||||
|
commonInfo.putAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||||
filtersWereAppliedToContext = filters != null;
|
filtersWereAppliedToContext = filters != null;
|
||||||
this.isPhased = isPhased;
|
this.isPhased = isPhased;
|
||||||
validate();
|
validate();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new Genotype for sampleName with genotype according to alleles.
|
||||||
|
* @param sampleName
|
||||||
|
* @param alleles
|
||||||
|
* @param negLog10PError the confidence in these alleles
|
||||||
|
* @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
|
||||||
|
*/
|
||||||
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods) {
|
||||||
|
this(sampleName, alleles, negLog10PError, null, null, false, log10Likelihoods);
|
||||||
|
}
|
||||||
|
|
||||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
||||||
this(sampleName, alleles, negLog10PError, null, null, false);
|
this(sampleName, alleles, negLog10PError, null, null, false);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -89,8 +89,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
|
@Test public void combineWithPLs() { combinePLs("combine.3.vcf", "combine.4.vcf", "0f873fed02aa99db5b140bcd6282c10a"); }
|
||||||
|
|
||||||
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
|
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
|
||||||
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format
|
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "96941ee177b0614a9879af0ac3218963"); } // official project VCF files in tabix format
|
||||||
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); }
|
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1c8720fde62687c2e861217670d8b3c"); }
|
||||||
|
|
||||||
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
|
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
|
||||||
|
|
||||||
|
|
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
|
||||||
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
" -genotypeMergeOptions UNIQUIFY -L 1"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9"));
|
Arrays.asList("212d9d3df10bb29e2c7fb226da422dc0"));
|
||||||
executeTest("threeWayWithRefs", spec);
|
executeTest("threeWayWithRefs", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||||
1,
|
1,
|
||||||
Arrays.asList(""));
|
Arrays.asList("5969446769cb8377daa2db29304ae6b5"));
|
||||||
executeTest("combineDBSNPDuplicateSites:", spec);
|
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -105,7 +105,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
|
public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
|
||||||
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
||||||
metaData.add(new VCFHeaderLine("two", "2"));
|
metaData.add(new VCFHeaderLine("two", "2"));
|
||||||
additionalColumns.add("FORMAT");
|
|
||||||
additionalColumns.add("extra1");
|
additionalColumns.add("extra1");
|
||||||
additionalColumns.add("extra2");
|
additionalColumns.add("extra2");
|
||||||
return new VCFHeader(metaData, additionalColumns);
|
return new VCFHeader(metaData, additionalColumns);
|
||||||
|
|
@ -159,6 +158,6 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
Assert.assertTrue(additionalColumns.contains(key));
|
Assert.assertTrue(additionalColumns.contains(key));
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
Assert.assertEquals(index+1, additionalColumns.size() /* for the header field we don't see */);
|
Assert.assertEquals(index, additionalColumns.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
// the imports for unit testing.
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeSuite;
|
import org.testng.annotations.BeforeSuite;
|
||||||
import org.testng.annotations.BeforeTest;
|
import org.testng.annotations.BeforeTest;
|
||||||
|
|
@ -14,10 +15,7 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
public class VariantContextUnitTest extends BaseTest {
|
||||||
* Basic unit test for RecalData
|
|
||||||
*/
|
|
||||||
public class VariantContextUnitTest {
|
|
||||||
Allele A, Aref, T, Tref;
|
Allele A, Aref, T, Tref;
|
||||||
Allele del, delRef, ATC, ATCref;
|
Allele del, delRef, ATC, ATCref;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,202 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// our package
|
||||||
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
|
||||||
|
// the imports for unit testing.
|
||||||
|
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import org.apache.log4j.Priority;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeSuite;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
Allele Aref, T, delRef, ATC;
|
||||||
|
Genotype ref1, snp1, snp2, indel1, indelref;
|
||||||
|
private GenomeLocParser genomeLocParser;
|
||||||
|
VariantContext refVC, snpVC1, snpVC2, snpVC3, snpVC4, indelVC1, indelVC2, indelVC3;
|
||||||
|
|
||||||
|
@BeforeSuite
|
||||||
|
public void setup() {
|
||||||
|
final File referenceFile = new File(b37KGReference);
|
||||||
|
try {
|
||||||
|
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
|
}
|
||||||
|
catch(FileNotFoundException ex) {
|
||||||
|
throw new UserException.CouldNotReadInputFile(referenceFile,ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
// alleles
|
||||||
|
Aref = Allele.create("A", true);
|
||||||
|
delRef = Allele.create("-", true);
|
||||||
|
T = Allele.create("T");
|
||||||
|
ATC = Allele.create("ATC");
|
||||||
|
|
||||||
|
ref1 = new Genotype("ref1", Arrays.asList(Aref, Aref), 5, new double[]{0, 5, 10});
|
||||||
|
snp1 = new Genotype("snp1", Arrays.asList(Aref,T), 10, new double[]{10, 0, 20});
|
||||||
|
snp2 = new Genotype("snp2", Arrays.asList(T,T), 15, new double[]{25, 15, 0});
|
||||||
|
indelref = new Genotype("indelref", Arrays.asList(delRef,delRef), 25, new double[]{0, 25, 30});
|
||||||
|
indel1 = new Genotype("indel1", Arrays.asList(delRef,ATC), 20, new double[]{20, 0, 30});
|
||||||
|
|
||||||
|
refVC = makeVC("refvc", Arrays.asList(Aref), Arrays.asList(ref1));
|
||||||
|
snpVC1 = makeVC("snpvc1", Arrays.asList(Aref, T), Arrays.asList(snp1));
|
||||||
|
snpVC2 = makeVC("snpvc2", Arrays.asList(Aref, T), Arrays.asList(snp1, snp2));
|
||||||
|
snpVC3 = makeVC("snpvc3", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1));
|
||||||
|
snpVC4 = makeVC("snpvc4", Arrays.asList(Aref, T), Arrays.asList(ref1, snp1, snp2));
|
||||||
|
indelVC1 = makeVC("indelvc1", Arrays.asList(delRef), Arrays.asList(indelref));
|
||||||
|
indelVC2 = makeVC("indelvc2", Arrays.asList(delRef, ATC), Arrays.asList(indel1));
|
||||||
|
indelVC3 = makeVC("indelvc3", Arrays.asList(delRef, ATC), Arrays.asList(indelref, indel1));
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles) {
|
||||||
|
return makeVC(source, alleles, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes) {
|
||||||
|
return makeVC(source, alleles, genotypes, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext makeVC(String source, List<Allele> alleles, Collection<Genotype> genotypes, Set<String> filters) {
|
||||||
|
int start = 10;
|
||||||
|
int stop = start; // alleles.contains(ATC) ? start + 3 : start;
|
||||||
|
return new VariantContext(source, "1", start, stop, alleles,
|
||||||
|
VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes),
|
||||||
|
1.0, filters, null, (byte)'C');
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SimpleMergeTest extends TestDataProvider {
|
||||||
|
List<VariantContext> inputVCs;
|
||||||
|
VariantContext expectedVC;
|
||||||
|
|
||||||
|
private SimpleMergeTest(VariantContext... vcsArg) {
|
||||||
|
super(SimpleMergeTest.class);
|
||||||
|
LinkedList<VariantContext> allVCs = new LinkedList<VariantContext>(Arrays.asList(vcsArg));
|
||||||
|
expectedVC = allVCs.pollLast();
|
||||||
|
inputVCs = allVCs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("SimpleMergeTest vc=%s expected=%s", inputVCs, expectedVC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "simplemergedata")
|
||||||
|
public Object[][] createSimpleMergeData() {
|
||||||
|
// first, do no harm
|
||||||
|
new SimpleMergeTest(refVC, refVC);
|
||||||
|
new SimpleMergeTest(snpVC1, snpVC1);
|
||||||
|
new SimpleMergeTest(indelVC1, indelVC1);
|
||||||
|
new SimpleMergeTest(indelVC3, indelVC3);
|
||||||
|
|
||||||
|
new SimpleMergeTest(refVC, snpVC1, snpVC3);
|
||||||
|
new SimpleMergeTest(snpVC1, snpVC2, snpVC2);
|
||||||
|
new SimpleMergeTest(refVC, snpVC2, snpVC4);
|
||||||
|
|
||||||
|
new SimpleMergeTest(indelVC1, indelVC2, indelVC3);
|
||||||
|
new SimpleMergeTest(indelVC1, indelVC3, indelVC3);
|
||||||
|
new SimpleMergeTest(indelVC2, indelVC3, indelVC3);
|
||||||
|
|
||||||
|
return SimpleMergeTest.getTests(SimpleMergeTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
private class SimpleMergeRSIDTest extends TestDataProvider {
|
||||||
|
List<String> inputs;
|
||||||
|
String expected;
|
||||||
|
|
||||||
|
private SimpleMergeRSIDTest(String... arg) {
|
||||||
|
super(SimpleMergeRSIDTest.class);
|
||||||
|
LinkedList<String> allStrings = new LinkedList<String>(Arrays.asList(arg));
|
||||||
|
expected = allStrings.pollLast();
|
||||||
|
inputs = allStrings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("SimpleMergeRSIDTest vc=%s expected=%s", inputs, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "simplemergersiddata")
|
||||||
|
public Object[][] createSimpleMergeRSIDData() {
|
||||||
|
new SimpleMergeRSIDTest(".", ".");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs1");
|
||||||
|
new SimpleMergeRSIDTest(".", "rs1", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", ".", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs2", "rs1,rs2");
|
||||||
|
new SimpleMergeRSIDTest("rs2", "rs1", "rs2,rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs2", "rs1", ".", "rs2,rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs2", ".", "rs1", "rs2,rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", ".", ".", "rs1");
|
||||||
|
new SimpleMergeRSIDTest("rs1", "rs2", "rs3", "rs1,rs2,rs3");
|
||||||
|
|
||||||
|
return SimpleMergeRSIDTest.getTests(SimpleMergeRSIDTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "simplemergersiddata")
|
||||||
|
public void testRSIDMerge(SimpleMergeRSIDTest cfg) {
|
||||||
|
List<VariantContext> inputs = new ArrayList<VariantContext>();
|
||||||
|
for ( String id : cfg.inputs ) {
|
||||||
|
MutableVariantContext vc = new MutableVariantContext(snpVC1);
|
||||||
|
if ( ! id.equals(".") ) vc.setID(id);
|
||||||
|
inputs.add(vc);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
VariantContext merged = myMerge(inputs);
|
||||||
|
Assert.assertEquals(merged.getID(), cfg.expected.equals(".") ? null : cfg.expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext myMerge(List<VariantContext> inputs) {
|
||||||
|
List<String> priority = new ArrayList<String>();
|
||||||
|
for ( VariantContext vc : inputs ) priority.add(vc.getSource());
|
||||||
|
|
||||||
|
return VariantContextUtils.simpleMerge(genomeLocParser,
|
||||||
|
inputs, priority,
|
||||||
|
VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
|
||||||
|
VariantContextUtils.GenotypeMergeType.PRIORITIZE, true, false, "set", false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo -- add tests for subset merging, especially with correct PLs
|
||||||
|
// todo -- test priority list
|
||||||
|
// todo -- test FilteredRecordMergeType
|
||||||
|
// todo -- no annotate origin
|
||||||
|
// todo -- test set key
|
||||||
|
// todo -- test filtered are uncalled
|
||||||
|
}
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<!-- Recalibration analysis script -->
|
<!-- Recalibration analysis script -->
|
||||||
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.*" />
|
<package name="org.broadinstitute.sting.gatk.walkers.recalibration" />
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</executable>
|
</executable>
|
||||||
<resources>
|
<resources>
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue