Merge branch 'master' of ssh://gsa3.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Menachem Fromer 2012-10-26 03:55:02 -04:00
commit 28393e9b30
12 changed files with 107 additions and 82 deletions

View File

@ -60,6 +60,10 @@ public class AlleleBiasedDownsamplingUtils {
// start by stratifying the reads by the alleles they represent at this position
for( final PileupElement pe : pileup ) {
// abort if we have a reduced read - we do not want to remove it!
if ( pe.getRead().isReducedRead() )
return pileup;
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
alleleStratifiedElements[baseIndex].add(pe);

View File

@ -55,9 +55,12 @@ public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLik
alleleReadMap.put(allele, new ArrayList<GATKSAMRecord>());
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
final Allele bestAllele = getMostLikelyAllele(entry.getValue());
if ( bestAllele != Allele.NO_CALL )
alleleReadMap.get(bestAllele).add(entry.getKey());
// do not remove reduced reads!
if ( !entry.getKey().isReducedRead() ) {
final Allele bestAllele = getMostLikelyAllele(entry.getValue());
if ( bestAllele != Allele.NO_CALL )
alleleReadMap.get(bestAllele).add(entry.getKey());
}
}
// compute the reads to remove and actually remove them

View File

@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSNP_ACS_Pools() {
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8");
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
}
}

View File

@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "1d826f852ec1457efbfa7ee828c5783a");
HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3");
HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60");
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f"));
Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}

View File

@ -75,8 +75,9 @@ public class StandardCallerArgumentCollection {
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
* will try to remove (N * contamination fraction) bases for each alternate allele.
*/
@Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
public double CONTAMINATION_FRACTION = 0.0;
@Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05;
@Hidden
@Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false)

View File

@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
/**
* Note that calculating the SLOD increases the runtime by an appreciable amount.
*/
@Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
public boolean NO_SLOD = false;
@Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false)
public boolean COMPUTE_SLOD = false;
/**
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
this.GLmodel = uac.GLmodel;
this.AFmodel = uac.AFmodel;
this.PCR_error = uac.PCR_error;
this.NO_SLOD = uac.NO_SLOD;
this.COMPUTE_SLOD = uac.COMPUTE_SLOD;
this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE;
this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
@ -238,7 +239,12 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
}
if ( UAC.CONTAMINATION_FRACTION > 0.0 ) {
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling");
if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) {
UAC.CONTAMINATION_FRACTION = 0.0;
logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite");
} else {
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling");
}
}
}
@ -290,7 +296,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
if ( !UAC.NO_SLOD )
if ( UAC.COMPUTE_SLOD )
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY);
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )

View File

@ -451,7 +451,7 @@ public class UnifiedGenotyperEngine {
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
}
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
//final boolean DEBUG_SLOD = false;
// the overall lod

View File

@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
UAC.NO_SLOD = true;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
}

View File

@ -193,6 +193,8 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
infoMap.put("isHet", g.isHet() ? "1" : "0");
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
if ( g.hasDP() )
infoMap.put(VCFConstants.DEPTH_KEY, g.getDP());
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
infoMap.put(e.getKey(), e.getValue());

View File

@ -108,4 +108,22 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
Arrays.asList("8ed32a2272bab8043a255362335395ef"));
executeTest("testUnfilteredBecomesFilteredAndPass", spec);
}
@Test
public void testFilteringDPfromINFO() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --filterExpression 'DP < 8' --filterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
Arrays.asList("a01f7cce53ea556c9741aa60b6124c41"));
executeTest("testFilteringDPfromINFO", spec);
}
@Test
public void testFilteringDPfromFORMAT() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
Arrays.asList("e10485c7c33d9211d0c1294fd7858476"));
executeTest("testFilteringDPfromFORMAT", spec);
}
}

View File

@ -16,9 +16,9 @@ import java.util.List;
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam";
// --------------------------------------------------------------------------------------------------------------
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("847605f4efafef89529fe0e496315edd"));
Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a"));
executeTest("test MultiSample Pilot1", spec);
}
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("bc15123620e1134f799005d71d1180fe"));
Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("1ba7afccc8552f20d72d0b62237558e3"));
Arrays.asList("24532eb381724cd74e99370da28d49ed"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -54,22 +54,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("afb8768f31ab57eb43f75c1115eadc99"));
Arrays.asList("062a946160eec1d0fc135d58ca654ff4"));
executeTest("test SingleSample Pilot2", spec);
}
@Test
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("543f68e42034bf44cfb24da8c9204320"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("94dc17d76d841f1d3a36160767ffa034"));
executeTest("test Multiple SNP alleles", spec);
}
@Test
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("d915535c1458733f09f82670092fcab6"));
executeTest("test bad read", spec);
}
@ -77,16 +77,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("9106d01ca0d0a8fedd068e72d509f380"));
executeTest("test reverse trim", spec);
}
@Test
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("d847acf841ba8ba653f996ce4869f439"));
executeTest("test mismatched PLs", spec);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "fd236bd635d514e4214d364f45ec4d10";
private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad";
@Test
public void testCompressedOutput() {
@ -117,7 +117,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "d408b4661b820ed86272415b8ea08780";
String md5 = "3d25ea660275a455ca443a786bff3d32";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -149,15 +149,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("839ecd30d354a36b5dfa2b5e99859765"));
Arrays.asList("56157d930da6ccd224bce1ca93f11e41"));
executeTest("test min_base_quality_score 26", spec);
}
@Test
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9"));
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("9f95cfe14d53a697c58247833bfd72a6"));
executeTest("test SLOD", spec);
}
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("abd8e33e649cc11b55e200d3940cc7e2"));
Arrays.asList("480437dd6e2760f4ab3194431519f331"));
executeTest("test NDA", spec);
}
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("8a9b424e00cdbe6b5e73d517335b2186"));
Arrays.asList("22c039412fd387dde6125b07c9a74a25"));
executeTest("test using comp track", spec);
}
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "97ba874eafc9884a4de027a84c036311");
testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645");
testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841");
}
private void testOutputParameters(final String args, final String md5) {
@ -211,18 +211,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
Arrays.asList("df524e98903d96ab9353bee7c16a69de"));
executeTest("test confidence 1", spec1);
}
@Test
public void testConfidence2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
executeTest("test confidence 2", spec2);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing heterozygosity
@ -230,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "986923de51c71635d47e3d06fe3794a1" );
testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "fb12b1553f813004a394a391a8540873" );
testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -259,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("04a87b87ee4323eba853c78f25551d1a"));
Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -278,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("98058fc913b61c22d44875da1f5ea89c"));
Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -297,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("650c53774afacfc07a595675e8cdde17"));
Arrays.asList("3593495aab5f6204c65de0b073a6ff65"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -312,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("6a0c2a3a7bcc56ad01428c71408055aa"));
Arrays.asList("8b486a098029d5a106b0a37eff541c15"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -325,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("5f2721c3323de5390d2d47446139f32b"));
Arrays.asList("18efedc50cae2aacaba372265e38310b"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -335,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a"));
Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -345,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f"));
Arrays.asList("578c0540f4f2052a634a829bcb9cc27d"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -353,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -415,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("ba4fafec383fb988f20c8cf53dd3e9a0"));
Arrays.asList("857b8e5df444463ac27f665c4f67fbe2"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -423,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("4c57a88de275105156aaafc6f9041365"));
Arrays.asList("81d4c7d9010fd6733b2997bc378e7471"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -444,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
Arrays.asList("e8ebfaac0804b782f22ab8ea35152735"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@ -458,25 +450,25 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("da9c05f87bd6415e97f90c49cf68ed19"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("1b711c0966a2f76eb21801e73b76b758"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "1d4a826b144723ff0766c36aa0239287");
testReducedCalling("SNP", "9ba4867cadb366746ee63e7a4afdb95e");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "68ef51d5c98480e0c0192e0eecb95bca");
testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5");
}
private void testReducedCalling(final String model, final String md5) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
Arrays.asList(md5));
executeTest("test calling on a ReducedRead BAM with " + model, spec);
}
@ -490,7 +482,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testContaminationDownsampling() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_percentage_to_filter 0.20", 1,
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}