- Added per-sample AlleleBiasedDownsampling capability to HaplotypeCaller

- Added integration test to show that providing a contamination value and providing same value via a file results in the same VCF

- overrode default contamination value in test
This commit is contained in:
Yossi Farjoun 2013-07-10 08:06:54 -04:00
parent 7ddf85c040
commit afcf7b96db
3 changed files with 79 additions and 213 deletions

View File

@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.haplotype.EventMap;
@ -166,6 +167,8 @@ public class GenotypingEngine {
// Walk along each position in the key set and create each event to be outputted
final Set<Haplotype> calledHaplotypes = new HashSet<>();
final List<VariantContext> returnCalls = new ArrayList<>();
final Map<String, Double> emptyDownSamplingMap = new DefaultHashMap<>(0.0);
for( final int loc : startPosKeySet ) {
if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
final List<VariantContext> eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype);
@ -197,13 +200,13 @@ public class GenotypingEngine {
logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
}
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION );
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() );
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
if( call != null ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) );
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
@ -406,7 +409,7 @@ public class GenotypingEngine {
// BUGBUG: ugh, too complicated
protected Map<String, PerReadAlleleLikelihoodMap> convertHaplotypeReadMapToAlleleReadMap( final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
final Map<Allele, List<Haplotype>> alleleMapper,
final double downsamplingFraction ) {
final Map<String,Double> perSampleDownsamplingFraction ) {
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new LinkedHashMap<>();
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample
@ -423,7 +426,7 @@ public class GenotypingEngine {
perReadAlleleLikelihoodMap.add(readEntry.getKey(), alleleMapperEntry.getKey(), maxLikelihood);
}
}
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction); // perform contamination downsampling
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(perSampleDownsamplingFraction.get(haplotypeReadMapEntry.getKey())); // perform contamination downsampling
alleleReadMap.put(haplotypeReadMapEntry.getKey(), perReadAlleleLikelihoodMap);
}

View File

@ -47,7 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileWriter;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingUtils;
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
@ -70,7 +71,10 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
@ -552,14 +556,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
simpleUAC.exactCallsLog = null;
UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
// Currently, per-sample contamination level is only implemented for UG
if( UAC.CONTAMINATION_FRACTION_FILE !=null) {
throw new UserException("Per-Sample contamination level not supported in Haplotype Caller at this point");
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
}
// when we do implement per-sample contamination for HC, this will probably be needed.
// UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, samples, logger));
// initialize the output VCF header
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());

View File

@ -48,49 +48,24 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.junit.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
public class BiasedDownsamplingIntegrationTest extends WalkerTest {
private final static String baseCommand1 = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
private final static String baseCommand2 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:1,000,000-5,000,000";
private final static String baseCommand3 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000";
private final static String baseCommandUG = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000";
private final static String baseCommandHC = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:4,000,000-5,000,000" + " --useFilteredReadsForAnnotations";
private final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/";
// --------------------------------------------------------------------------------------------------------------
//
// testing UnifiedGenotyper contamination down-sampling
//
// --------------------------------------------------------------------------------------------------------------
@Test(enabled = false)
public void testContaminationDownsamplingFlat() {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1,
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}
@Test(enabled = false)
public void testContaminationDownsamplingFlatAndPerSample() {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_per_sample_file " + ArtificalBAMLocation + "NA12878.NA19240.contam.txt --contamination_fraction_to_filter 0.10", 1,
Arrays.asList("53395814dd6990448a01a294ccd69bd2"));
executeTest("test contamination_percentage_to_filter per-sample and .20 overall", spec);
}
@Test(enabled = false)
public void testContaminationDownsamplingPerSampleOnly() {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contaminationFile " + ArtificalBAMLocation + "NA19240.contam.txt", 1,
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
executeTest("test contamination_percentage_to_filter per-sample", spec);
}
// --------------------------------------------------------------------------------------------------------------
//
@ -98,150 +73,49 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
@Test(enabled = false)
@Test
private void testDefaultContamination() {
final String bam1 = "NA11918.with.1.NA12842.reduced.bam";
final String bam2 = "NA12842.with.1.NA11918.reduced.bam";
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ", 1,
Arrays.asList("e2e5a8dd313f8d7e382e7d49dfac59a2"));
executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with default downsampling.", spec);
baseCommandUG + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination .05 ", 1,
Arrays.asList("b13612312ff991cf40ddc44255e76ecd"));
executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with .05 downsampling.", spec);
}
private void testFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1,
Arrays.asList(md5));
executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec);
}
@Test(enabled = false)
public void testFlatContaminationCase1() {
testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "e2e5a8dd313f8d7e382e7d49dfac59a2");
}
@Test(enabled = false)
public void testFlatContaminationCase2() {
testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "549737002f98775fea8f46e7ea174dde");
}
@Test(enabled = false)
public void testFlatContaminationCase3() {
testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "529d82c2a33fcc303a5dc55de2d56979");
}
@Test(enabled = false)
public void testFlatContaminationCase4() {
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.1, "b5689972fbb7d230a372ee5f0da1c6d7");
}
@Test(enabled = false)
public void testFlatContaminationCase5() {
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.2, "9dceee2e921b53fbc1ce137a7e0b7b74");
}
@Test(enabled = false)
public void testFlatContaminationCase6() {
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.3, "d6a74061033503af80dcaea065bfa075");
}
@Test(enabled = false)
public void testFlatContaminationCase7() {
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "7d1b5efab58a1b8f9d99fcf5af82f15a");
}
@Test(enabled = false)
public void testFlatContaminationCase8() {
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "a7f8d5c79626aff59d7f426f79d8816e");
}
@Test(enabled = false)
public void testFlatContaminationCase9() {
testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.3, "fcf482398b7c908e3e2d1e4d5da6377b");
}
private void testPerSampleContamination(String bam1, String bam2, String persampleFile, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contaminationFile " + persampleFile, 1,
Arrays.asList(md5));
executeTest("test contamination on Artificial Contamination (per-sample) on " + bam1 + " and " + bam2 + " with " + persampleFile, spec);
}
@Test(enabled = false)
public void testPerSampleContaminationCase1() {
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "e00278527a294833259e9e411728e395");
}
@Test(enabled = false)
public void testPerSampleContaminationCase2() {
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "a443e793f0b0e2ffce1b751634d706e2");
}
@Test(enabled = false)
public void testPerSampleContaminationCase3() {
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "e11d83a7815ce757afbcf7689568cb25");
}
@Test(enabled = false)
public void testPerSampleContaminationCase4() {
testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "615042eeeffe042bd1c86279d34f80b6");
}
@Test(enabled = false)
public void testPerSampleContaminationCase5() {
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "9bc99fc79ca34744bf26cb19ee4ef44d");
}
@Test(enabled = false)
public void testPerSampleContaminationCase6() {
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "143626fe5fce765d6c997a64f058a813");
}
@Test(enabled = false)
public void testPerSampleContaminationCase7() {
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "f2593674cef894eda4e0be9cf3158f57");
}
@Test(enabled = false)
public void testPerSampleContaminationCase8() {
testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "fb7ce0740767ae3896b3e552026da1e4");
}
private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling, final String md5) {
final String command = baseCommand3 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ";
WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList(md5));
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec);
spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList(md5));
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec);
}
// verify that inputing a file with an effectively flat contamination level is equivalent to handing in a flat contamination level
@Test(enabled = false)
public void testPerSampleEqualsFlatContaminationCase1() {
testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0, "");
@DataProvider(name="PerSampleEqualFlatContamBams")
public Object[][] makePerSampleEqualFlatContamBams() {
final List<Object[]> tests = new LinkedList<Object[]>();
tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0}) ;
tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15}) ;
tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3}) ;
return tests.toArray(new Object[][]{});
}
@Test(enabled = false)
public void testPerSampleEqualsFlatContaminationCase2() {
testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15, "");
}
@Test(dataProvider = "PerSampleEqualFlatContamBams")
private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling) {
final String command = baseCommandUG + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ";
@Test(enabled = false)
public void testPerSampleEqualsFlatContaminationCase3() {
testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3, "");
}
WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList(""));
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
Pair<List<File>, List<String>> test1 = executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec);
spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList(""));
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
Pair<List<File>, List<String>> test2 = executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec);
//verify that the md5s match up.
Assert.assertEquals(test1.getSecond().get(0),test2.getSecond().get(0));
}
// --------------------------------------------------------------------------------------------------------------
//
@ -250,50 +124,39 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test(enabled = false)
public void testHCContaminationDownsamplingFlat() {
final String baseCommand = "-T HaplotypeCaller -R " + b36KGReference + " --no_cmdline_in_header --dbsnp " + b36dbSNP129;
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1,
Arrays.asList("c3a253467ead7b1cfe9fd9dd310828b1"));
executeTest("HC calling with contamination_percentage_to_filter 0.20", spec);
}
// HaplotypeCaller can only (currently) use flat contamination reduction, not per-sample. Until that is implemented, this test
@Test(enabled = false)
public void testHCCannotProcessPerSampleContamination() {
final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000";
final String bam1 = "NA11918.with.1.NA12842.reduced.bam";
final String perSampleFile = ArtificalBAMLocation + "contamination.case.1.txt";
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -o %s -contaminationFile " + perSampleFile, 1,
UserException.class);
executeTest("HC should fail on per-Sample contamination removal.", spec);
@DataProvider(name="PerSampleEqualFlatContamBamsHC")
public Object[][] makePerSampleEqualFlatContamBamsHC() {
final List<Object[]> tests = new LinkedList<Object[]>();
tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0 }) ;
tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15}) ;
tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3}) ;
return tests.toArray(new Object[][]{});
}
private void testHCFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) {
final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000";
@Test(dataProvider = "PerSampleEqualFlatContamBamsHC")
private void testPerSampleEqualsFlatHC(final String bam1, final String bam2, final String persampleFile, final Double downsampling) {
final String command = baseCommandHC + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ";
WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList(""));
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
Pair<List<File>, List<String>> test1= executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec);
WalkerTestSpec spec2 = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList(""));
rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result
Pair<List<File>, List<String>> test2=executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec);
//verify that the md5s match up.
Assert.assertEquals(test1.getSecond().get(0),test2.getSecond().get(0));
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1,
Arrays.asList(md5));
executeTest("HC test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec);
}
@Test(enabled = false)
public void testHCFlatContaminationCase1() {
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "c3e695381d8627e3922d8c642b66c3ce");
}
@Test(enabled = false)
public void testHCFlatContaminationCase2() {
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "002d2b45336d88d7c04e19f9f26e29d9");
}
@Test(enabled = false)
public void testHCFlatContaminationCase3() {
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "1809a33ac112d1a3bd7a071c566794dd");
}
}
}