From afcf7b96dbf94991ad19a7b76f54b5c530e4ea2f Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 10 Jul 2013 08:06:54 -0400 Subject: [PATCH] - Added per-sample AlleleBiasedDownsampling capability to HaplotypeCaller - Added integration test to show that providing a contamination value and providing same value via a file results in the same VCF - overrode default contamination value in test --- .../haplotypecaller/GenotypingEngine.java | 11 +- .../haplotypecaller/HaplotypeCaller.java | 14 +- .../BiasedDownsamplingIntegrationTest.java | 267 +++++------------- 3 files changed, 79 insertions(+), 213 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 04173b64f..82029b872 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.haplotype.EventMap; @@ -166,6 +167,8 @@ public class GenotypingEngine { // Walk along each position in the key set and create each event to be outputted final Set calledHaplotypes = new HashSet<>(); final List returnCalls = new ArrayList<>(); + final Map emptyDownSamplingMap = new DefaultHashMap<>(0.0); + for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region final List eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype); @@ -197,13 +200,13 @@ public class GenotypingEngine { logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); } - final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION ); + final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() ); final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL); if( call != null ) { final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : - convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) ); + convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) ); final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call); @@ -406,7 +409,7 @@ public class GenotypingEngine { // BUGBUG: ugh, too complicated protected Map convertHaplotypeReadMapToAlleleReadMap( final Map haplotypeReadMap, final Map> alleleMapper, - final double downsamplingFraction ) { + final Map perSampleDownsamplingFraction ) { final Map alleleReadMap = new LinkedHashMap<>(); for( final Map.Entry haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample @@ -423,7 +426,7 @@ public class GenotypingEngine { perReadAlleleLikelihoodMap.add(readEntry.getKey(), alleleMapperEntry.getKey(), maxLikelihood); } } - perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction); // perform contamination downsampling + perReadAlleleLikelihoodMap.performPerAlleleDownsampling(perSampleDownsamplingFraction.get(haplotypeReadMapEntry.getKey())); // perform contamination downsampling alleleReadMap.put(haplotypeReadMapEntry.getKey(), perReadAlleleLikelihoodMap); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f18e37480..d96450370 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -47,7 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; @@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.downsampling.DownsamplingUtils; import org.broadinstitute.sting.gatk.filters.BadMateFilter; @@ -70,7 +71,10 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; @@ -552,14 +556,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In simpleUAC.exactCallsLog = null; UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); - // Currently, per-sample contamination level is only implemented for UG if( UAC.CONTAMINATION_FRACTION_FILE !=null) { - throw new UserException("Per-Sample contamination level not supported in Haplotype Caller at this point"); + UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger)); } - // when we do implement per-sample contamination for HC, this will probably be needed. - // UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, samples, logger)); - // initialize the output VCF header annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java index 77c9f96c9..fd1f0de8a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java @@ -48,49 +48,24 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.collections.Pair; +import org.junit.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; import java.util.Random; public class BiasedDownsamplingIntegrationTest extends WalkerTest { - private final static String baseCommand1 = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommand2 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:1,000,000-5,000,000"; - private final static String baseCommand3 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000"; + private final static String baseCommandUG = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000"; + private final static String baseCommandHC = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:4,000,000-5,000,000" + " --useFilteredReadsForAnnotations"; + private final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/"; - // -------------------------------------------------------------------------------------------------------------- - // - // testing UnifiedGenotyper contamination down-sampling - // - // -------------------------------------------------------------------------------------------------------------- - - @Test(enabled = false) - public void testContaminationDownsamplingFlat() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1, - Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb")); - executeTest("test contamination_percentage_to_filter 0.20", spec); - } - - @Test(enabled = false) - public void testContaminationDownsamplingFlatAndPerSample() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_per_sample_file " + ArtificalBAMLocation + "NA12878.NA19240.contam.txt --contamination_fraction_to_filter 0.10", 1, - Arrays.asList("53395814dd6990448a01a294ccd69bd2")); - executeTest("test contamination_percentage_to_filter per-sample and .20 overall", spec); - } - - @Test(enabled = false) - public void testContaminationDownsamplingPerSampleOnly() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contaminationFile " + ArtificalBAMLocation + "NA19240.contam.txt", 1, - Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb")); - executeTest("test contamination_percentage_to_filter per-sample", spec); - } - // -------------------------------------------------------------------------------------------------------------- // @@ -98,150 +73,49 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - @Test(enabled = false) + @Test private void testDefaultContamination() { final String bam1 = "NA11918.with.1.NA12842.reduced.bam"; final String bam2 = "NA12842.with.1.NA11918.reduced.bam"; WalkerTestSpec spec = new WalkerTestSpec( - baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ", 1, - Arrays.asList("e2e5a8dd313f8d7e382e7d49dfac59a2")); - executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with default downsampling.", spec); + baseCommandUG + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination .05 ", 1, + Arrays.asList("b13612312ff991cf40ddc44255e76ecd")); + executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with .05 downsampling.", spec); } - private void testFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1, - Arrays.asList(md5)); - executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec); - } - - @Test(enabled = false) - public void testFlatContaminationCase1() { - testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "e2e5a8dd313f8d7e382e7d49dfac59a2"); - } - - @Test(enabled = false) - public void testFlatContaminationCase2() { - testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "549737002f98775fea8f46e7ea174dde"); - } - - @Test(enabled = false) - public void testFlatContaminationCase3() { - testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "529d82c2a33fcc303a5dc55de2d56979"); - } - - @Test(enabled = false) - public void testFlatContaminationCase4() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.1, "b5689972fbb7d230a372ee5f0da1c6d7"); - } - - @Test(enabled = false) - public void testFlatContaminationCase5() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.2, "9dceee2e921b53fbc1ce137a7e0b7b74"); - } - - @Test(enabled = false) - public void testFlatContaminationCase6() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.3, "d6a74061033503af80dcaea065bfa075"); - } - - @Test(enabled = false) - public void testFlatContaminationCase7() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "7d1b5efab58a1b8f9d99fcf5af82f15a"); - } - - @Test(enabled = false) - public void testFlatContaminationCase8() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "a7f8d5c79626aff59d7f426f79d8816e"); - } - - @Test(enabled = false) - public void testFlatContaminationCase9() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.3, "fcf482398b7c908e3e2d1e4d5da6377b"); - } - - private void testPerSampleContamination(String bam1, String bam2, String persampleFile, final String md5) { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contaminationFile " + persampleFile, 1, - Arrays.asList(md5)); - executeTest("test contamination on Artificial Contamination (per-sample) on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase1() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "e00278527a294833259e9e411728e395"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase2() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "a443e793f0b0e2ffce1b751634d706e2"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase3() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "e11d83a7815ce757afbcf7689568cb25"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase4() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "615042eeeffe042bd1c86279d34f80b6"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase5() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "9bc99fc79ca34744bf26cb19ee4ef44d"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase6() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "143626fe5fce765d6c997a64f058a813"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase7() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "f2593674cef894eda4e0be9cf3158f57"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase8() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "fb7ce0740767ae3896b3e552026da1e4"); - } - - - private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling, final String md5) { - final String command = baseCommand3 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; - - WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList(md5)); - final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); - - rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result - executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); - - spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList(md5)); - - rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result - executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); - - } // verify that inputing a file with an effectively flat contamination level is equivalent to handing in a flat contamination level - @Test(enabled = false) - public void testPerSampleEqualsFlatContaminationCase1() { - testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0, ""); + + @DataProvider(name="PerSampleEqualFlatContamBams") + public Object[][] makePerSampleEqualFlatContamBams() { + final List tests = new LinkedList(); + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0}) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15}) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3}) ; + + return tests.toArray(new Object[][]{}); } - @Test(enabled = false) - public void testPerSampleEqualsFlatContaminationCase2() { - testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15, ""); - } + @Test(dataProvider = "PerSampleEqualFlatContamBams") + private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling) { + final String command = baseCommandUG + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; - @Test(enabled = false) - public void testPerSampleEqualsFlatContaminationCase3() { - testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3, ""); - } + WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList("")); + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + Pair, List> test1 = executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); + + spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList("")); + + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + Pair, List> test2 = executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); + + //verify that the md5s match up. + Assert.assertEquals(test1.getSecond().get(0),test2.getSecond().get(0)); + } // -------------------------------------------------------------------------------------------------------------- // @@ -250,50 +124,39 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- - @Test(enabled = false) - public void testHCContaminationDownsamplingFlat() { - final String baseCommand = "-T HaplotypeCaller -R " + b36KGReference + " --no_cmdline_in_header --dbsnp " + b36dbSNP129; - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1, - Arrays.asList("c3a253467ead7b1cfe9fd9dd310828b1")); - executeTest("HC calling with contamination_percentage_to_filter 0.20", spec); - } - // HaplotypeCaller can only (currently) use flat contamination reduction, not per-sample. Until that is implemented, this test - @Test(enabled = false) - public void testHCCannotProcessPerSampleContamination() { - final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000"; - final String bam1 = "NA11918.with.1.NA12842.reduced.bam"; - final String perSampleFile = ArtificalBAMLocation + "contamination.case.1.txt"; - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -o %s -contaminationFile " + perSampleFile, 1, - UserException.class); - executeTest("HC should fail on per-Sample contamination removal.", spec); + @DataProvider(name="PerSampleEqualFlatContamBamsHC") + public Object[][] makePerSampleEqualFlatContamBamsHC() { + final List tests = new LinkedList(); + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0 }) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15}) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3}) ; + + return tests.toArray(new Object[][]{}); } - private void testHCFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) { - final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000"; + @Test(dataProvider = "PerSampleEqualFlatContamBamsHC") + private void testPerSampleEqualsFlatHC(final String bam1, final String bam2, final String persampleFile, final Double downsampling) { + final String command = baseCommandHC + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; + + WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList("")); + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + + Pair, List> test1= executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); + + WalkerTestSpec spec2 = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList("")); + + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + Pair, List> test2=executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); + + //verify that the md5s match up. + Assert.assertEquals(test1.getSecond().get(0),test2.getSecond().get(0)); - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1, - Arrays.asList(md5)); - executeTest("HC test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec); } - @Test(enabled = false) - public void testHCFlatContaminationCase1() { - testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "c3e695381d8627e3922d8c642b66c3ce"); - } - @Test(enabled = false) - public void testHCFlatContaminationCase2() { - testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "002d2b45336d88d7c04e19f9f26e29d9"); - } - @Test(enabled = false) - public void testHCFlatContaminationCase3() { - testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "1809a33ac112d1a3bd7a071c566794dd"); - } - -} +} \ No newline at end of file