From ba2c3b57edff4f8b4186ffacd499ba59794b0f7c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 24 Apr 2013 11:52:26 -0400 Subject: [PATCH 1/2] Extended the allele-biased down-sampling functionality to handle reduced reads. Note that this works only in the case of pileups (i.e. coming from UG); allele-biased down-sampling for RR just cannot work for haplotypes. Added lots of unit tests for new functionality. --- .../StandardCallerArgumentCollection.java | 6 - ...elGenotypeLikelihoodsCalculationModel.java | 2 +- ...NPGenotypeLikelihoodsCalculationModel.java | 2 +- .../haplotypecaller/GenotypingEngine.java | 10 +- .../indels/PairHMMIndelErrorModel.java | 6 +- ...dGenotyperReducedReadsIntegrationTest.java | 6 +- .../PerReadAlleleLikelihoodMapUnitTest.java | 2 +- .../arguments/GATKArgumentCollection.java | 5 +- .../AlleleBiasedDownsamplingUtils.java | 181 +++++++++++------- .../genotyper/PerReadAlleleLikelihoodMap.java | 19 +- .../sting/utils/pileup/PileupElement.java | 15 +- .../sting/utils/sam/GATKSAMRecord.java | 35 +++- ...AlleleBiasedDownsamplingUtilsUnitTest.java | 56 +++++- .../utils/sam/GATKSAMRecordUnitTest.java | 34 +++- 14 files changed, 268 insertions(+), 111 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 03698489c..63b8717c0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -54,7 +54,6 @@ import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; -import java.io.PrintStream; import java.util.Collections; import java.util.Map; @@ -170,10 +169,6 @@ public class StandardCallerArgumentCollection { @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel(); - @Hidden - @Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false) - public PrintStream contaminationLog = null; - @Hidden @Argument(shortName = "logExactCalls", doc="x", required=false) public File exactCallsLog = null; @@ -192,7 +187,6 @@ public class StandardCallerArgumentCollection { this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING; this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION; this.CONTAMINATION_FRACTION_FILE=SCAC.CONTAMINATION_FRACTION_FILE; - this.contaminationLog = SCAC.contaminationLog; this.exactCallsLog = SCAC.exactCallsLog; this.sampleContamination=SCAC.sampleContamination; this.AFmodel = SCAC.AFmodel; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 8a766ba48..c6e9ea379 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -145,7 +145,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood final ReadBackedPileup pileup = context.getBasePileup(); if (pileup != null) { final GenotypeBuilder b = new GenotypeBuilder(sample.getKey()); - final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()), UAC.contaminationLog); + final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey())); b.PL(genotypeLikelihoods); b.DP(getFilteredDepth(pileup)); genotypes.add(b.make()); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 7d2f794ec..ce5f94478 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -105,7 +105,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); final Double contamination = UAC.getSampleContamination().get(sample.getKey()); if( contamination > 0.0 ) //no need to enter if no contamination reduction - pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup,contamination, UAC.contaminationLog); + pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup, contamination); if ( useBAQedPileup ) pileup = createBAQedPileup(pileup); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 5fe98649f..419ea378f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -64,7 +64,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.*; -import java.io.PrintStream; import java.util.*; public class GenotypingEngine { @@ -196,13 +195,13 @@ public class GenotypingEngine { logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); } - final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog ); + final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION ); final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL); if( call != null ) { final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : - convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0, UG_engine.getUAC().contaminationLog ) ); + convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) ); final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); VariantContext annotatedCall = call; @@ -406,8 +405,7 @@ public class GenotypingEngine { // BUGBUG: ugh, too complicated protected Map convertHaplotypeReadMapToAlleleReadMap( final Map haplotypeReadMap, final Map> alleleMapper, - final double downsamplingFraction, - final PrintStream downsamplingLog ) { + final double downsamplingFraction ) { final Map alleleReadMap = new LinkedHashMap(); for( final Map.Entry haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample @@ -424,7 +422,7 @@ public class GenotypingEngine { perReadAlleleLikelihoodMap.add(readEntry.getKey(), alleleMapperEntry.getKey(), maxLikelihood); } } - perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog); // perform contamination downsampling + perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction); // perform contamination downsampling alleleReadMap.put(haplotypeReadMapEntry.getKey(), perReadAlleleLikelihoodMap); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 5702fff2a..93a015718 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -61,7 +61,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.variant.variantcontext.Allele; -import java.io.PrintStream; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.Map; @@ -213,13 +212,12 @@ public class PairHMMIndelErrorModel { final ReferenceContext ref, final int eventLength, final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, - final double downsamplingFraction, - final PrintStream downsamplingLog) { + final double downsamplingFraction) { final int numHaplotypes = haplotypeMap.size(); final int readCounts[] = new int[pileup.getNumberOfElements()]; final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts); - perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog); + perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction); return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index 21b7d0f86..c3597b6aa 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -63,18 +63,18 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("d55d37e2e86aefb91e47183d2c7dede8")); + Arrays.asList("f82389f2bc6d3f932a36be65b60af648")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "b424779c6609cb727a675bdd301290e6"); + testReducedCalling("SNP", "c87f89af948a554cc66bc3afa5251c3b"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "38c3d14cb9086f7355788d3db9b8ff16"); + testReducedCalling("INDEL", "ed4ddc42447ec037c1e14757b6cf0515"); } diff --git a/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java index 6ca49d3e5..9530ea41f 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMapUnitTest.java @@ -211,7 +211,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest { Assert.assertEquals(perReadAlleleLikelihoodMap.size(),pileup.depthOfCoverage()+10); Assert.assertEquals(perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap().get(base_A).size(),60); - perReadAlleleLikelihoodMap.performPerAlleleDownsampling(0.1,null); + perReadAlleleLikelihoodMap.performPerAlleleDownsampling(0.1); Assert.assertEquals(perReadAlleleLikelihoodMap.size(),(int) (0.9*(pileup.depthOfCoverage()+10))); Map> downsampledStrat = perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index e98dcfe9e..8d1fa4638 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -104,8 +104,9 @@ public class GATKArgumentCollection { @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false) public boolean nonDeterministicRandomSeed = false; - @Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.") - public boolean disableRandomization = false; + @Hidden + @Argument(fullName = "disableDithering",doc="Completely eliminates randomized dithering from rank sum tests. To be used in the testing framework where dynamic parallelism can result in differing numbers of calls to the random generator.") + public boolean disableDithering = false; @Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false) public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT; diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index 26e9febe7..fb7a16bfd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.downsampling; -import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.sting.utils.exceptions.StingException; @@ -38,65 +37,62 @@ import org.broadinstitute.variant.variantcontext.Allele; import java.io.File; import java.io.IOException; -import java.io.PrintStream; import java.util.*; import org.apache.log4j.Logger; public class AlleleBiasedDownsamplingUtils { + // define this class so that we can use Java generics below + private final static class PileupElementList extends ArrayList {} + /** * Computes an allele biased version of the given pileup * * @param pileup the original pileup * @param downsamplingFraction the fraction of total reads to remove per allele - * @param log logging output * @return allele biased pileup */ - public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { + public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) { // special case removal of all or no reads if ( downsamplingFraction <= 0.0 ) return pileup; if ( downsamplingFraction >= 1.0 ) return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList()); - final ArrayList[] alleleStratifiedElements = new ArrayList[4]; + final PileupElementList[] alleleStratifiedElements = new PileupElementList[4]; for ( int i = 0; i < 4; i++ ) - alleleStratifiedElements[i] = new ArrayList(); + alleleStratifiedElements[i] = new PileupElementList(); // start by stratifying the reads by the alleles they represent at this position + boolean sawReducedRead = false; for ( final PileupElement pe : pileup ) { - // we do not want to remove a reduced read - if ( !pe.getRead().isReducedRead() ) { - final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); - if ( baseIndex != -1 ) - alleleStratifiedElements[baseIndex].add(pe); - } + if ( pe.getRead().isReducedRead() ) + sawReducedRead = true; + + final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase()); + if ( baseIndex != -1 ) + alleleStratifiedElements[baseIndex].add(pe); } - // make a listing of allele counts - final int[] alleleCounts = new int[4]; - for ( int i = 0; i < 4; i++ ) - alleleCounts[i] = alleleStratifiedElements[i].size(); + // make a listing of allele counts and calculate the total count + final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements, sawReducedRead); + final int totalAlleleCount = (int)MathUtils.sum(alleleCounts); // do smart down-sampling - int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor + final int numReadsToRemove = (int)(totalAlleleCount * downsamplingFraction); // floor final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); final HashSet readsToRemove = new HashSet(numReadsToRemove); for ( int i = 0; i < 4; i++ ) { - final ArrayList alleleList = alleleStratifiedElements[i]; + final PileupElementList alleleList = alleleStratifiedElements[i]; // if we don't need to remove any reads, then don't - if ( alleleList.size() > targetAlleleCounts[i] ) - readsToRemove.addAll(downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log)); + if ( alleleCounts[i] > targetAlleleCounts[i] ) + readsToRemove.addAll(downsampleElements(alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i])); } - // clean up pointers so memory can be garbage collected if needed - for ( int i = 0; i < 4; i++ ) - alleleStratifiedElements[i].clear(); - // we need to keep the reads sorted because the FragmentUtils code will expect them in coordinate order and will fail otherwise - final List readsToKeep = new ArrayList(pileup.getNumberOfElements() - numReadsToRemove); + final List readsToKeep = new ArrayList(totalAlleleCount - numReadsToRemove); for ( final PileupElement pe : pileup ) { if ( !readsToRemove.contains(pe) ) { readsToKeep.add(pe); @@ -106,6 +102,26 @@ public class AlleleBiasedDownsamplingUtils { return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList(readsToKeep)); } + /** + * Calculates actual allele counts for each allele (which can be different than the list size when reduced reads are present) + * + * @param alleleStratifiedElements pileup elements stratified by allele + * @param sawReducedRead is at least one read a reduced read? + * @return non-null int array representing allele counts + */ + private static int[] calculateAlleleCounts(final PileupElementList[] alleleStratifiedElements, final boolean sawReducedRead) { + final int[] alleleCounts = new int[alleleStratifiedElements.length]; + for ( int i = 0; i < alleleStratifiedElements.length; i++ ) { + if ( !sawReducedRead ) { + alleleCounts[i] = alleleStratifiedElements[i].size(); + } else { + for ( final PileupElement pe : alleleStratifiedElements[i] ) + alleleCounts[i] += pe.getRepresentativeCount(); + } + } + return alleleCounts; + } + private static int scoreAlleleCounts(final int[] alleleCounts) { if ( alleleCounts.length < 2 ) return 0; @@ -128,11 +144,11 @@ public class AlleleBiasedDownsamplingUtils { } /** - * Computes an allele biased version of the given pileup + * Computes an allele biased version of the allele counts for a given pileup * - * @param alleleCounts the original pileup - * @param numReadsToRemove fraction of total reads to remove per allele - * @return allele biased pileup + * @param alleleCounts the allele counts for the original pileup + * @param numReadsToRemove number of total reads to remove per allele + * @return non-null array of new counts needed per allele */ protected static int[] runSmartDownsampling(final int[] alleleCounts, final int numReadsToRemove) { final int numAlleles = alleleCounts.length; @@ -169,36 +185,50 @@ public class AlleleBiasedDownsamplingUtils { /** * Performs allele biased down-sampling on a pileup and computes the list of elements to remove * - * @param elements original list of records + * @param elements original list of pileup elements + * @param originalElementCount original count of elements (taking reduced reads into account) * @param numElementsToRemove the number of records to remove - * @param log logging output * @return the list of pileup elements TO REMOVE */ - private static List downsampleElements(final List elements, final int numElementsToRemove, final PrintStream log) { - ArrayList elementsToRemove = new ArrayList(numElementsToRemove); - + protected static List downsampleElements(final List elements, final int originalElementCount, final int numElementsToRemove) { // are there no elements to remove? if ( numElementsToRemove == 0 ) - return elementsToRemove; + return Collections.emptyList(); + + final ArrayList elementsToRemove = new ArrayList(numElementsToRemove); // should we remove all of the elements? - final int pileupSize = elements.size(); - if ( numElementsToRemove == pileupSize ) { - logAllElements(elements, log); + if ( numElementsToRemove >= originalElementCount ) { elementsToRemove.addAll(elements); return elementsToRemove; } // create a bitset describing which elements to remove - final BitSet itemsToRemove = new BitSet(pileupSize); - for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) { + final BitSet itemsToRemove = new BitSet(originalElementCount); + for ( final Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(originalElementCount, numElementsToRemove) ) { itemsToRemove.set(selectedIndex); } - for ( int i = 0; i < pileupSize; i++ ) { - if ( itemsToRemove.get(i) ) { - final T element = elements.get(i); - logElement(element, log); + int currentBitSetIndex = 0; + for ( final PileupElement element : elements ) { + + final int representativeCount = element.getRepresentativeCount(); + + // if it's a reduced read, we need to be smart about how we down-sample + if ( representativeCount > 1 ) { + // count how many bits are set over the span represented by this read + int setBits = 0; + for ( int i = 0; i < representativeCount; i++ ) + setBits += itemsToRemove.get(currentBitSetIndex++) ? 1 : 0; + + // remove that count from the count of the reduced read + if ( setBits == representativeCount ) + elementsToRemove.add(element); + else + element.adjustRepresentativeCount(-1 * setBits); + } + // otherwise it's trivial: remove if the corresponding bit is set + else if ( itemsToRemove.get(currentBitSetIndex++) ) { elementsToRemove.add(element); } } @@ -211,10 +241,9 @@ public class AlleleBiasedDownsamplingUtils { * * @param alleleReadMap original list of records per allele * @param downsamplingFraction the fraction of total reads to remove per allele - * @param log logging output * @return list of reads TO REMOVE from allele biased down-sampling */ - public static List selectAlleleBiasedReads(final Map> alleleReadMap, final double downsamplingFraction, final PrintStream log) { + public static List selectAlleleBiasedReads(final Map> alleleReadMap, final double downsamplingFraction) { int totalReads = 0; for ( final List reads : alleleReadMap.values() ) totalReads += reads.size(); @@ -225,6 +254,8 @@ public class AlleleBiasedDownsamplingUtils { final List alleles = new ArrayList(alleleReadMap.keySet()); alleles.remove(Allele.NO_CALL); // ignore the no-call bin final int numAlleles = alleles.size(); + + // TODO -- if we ever decide to make this work for reduced reads, this will need to use the representative counts instead final int[] alleleCounts = new int[numAlleles]; for ( int i = 0; i < numAlleles; i++ ) alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size(); @@ -234,38 +265,52 @@ public class AlleleBiasedDownsamplingUtils { final List readsToRemove = new ArrayList(numReadsToRemove); for ( int i = 0; i < numAlleles; i++ ) { - final List alleleBin = alleleReadMap.get(alleles.get(i)); - - if ( alleleBin.size() > targetAlleleCounts[i] ) { - readsToRemove.addAll(downsampleElements(alleleBin, alleleBin.size() - targetAlleleCounts[i], log)); + if ( alleleCounts[i] > targetAlleleCounts[i] ) { + readsToRemove.addAll(downsampleElements(alleleReadMap.get(alleles.get(i)), alleleCounts[i] - targetAlleleCounts[i])); } } return readsToRemove; } - private static void logAllElements(final List elements, final PrintStream log) { - if ( log != null ) { - for ( final T obj : elements ) { - logElement(obj, log); - } + /** + * Performs allele biased down-sampling on a pileup and computes the list of elements to remove + * + * @param reads original list of records + * @param numElementsToRemove the number of records to remove + * @return the list of pileup elements TO REMOVE + */ + protected static List downsampleElements(final List reads, final int numElementsToRemove) { + // are there no elements to remove? + if ( numElementsToRemove == 0 ) + return Collections.emptyList(); + + final ArrayList elementsToRemove = new ArrayList(numElementsToRemove); + final int originalElementCount = reads.size(); + + // should we remove all of the elements? + if ( numElementsToRemove >= originalElementCount ) { + elementsToRemove.addAll(reads); + return elementsToRemove; } - } - private static void logElement(final T obj, final PrintStream log) { - if ( log != null ) { - - final GATKSAMRecord read; - if ( obj instanceof PileupElement ) - read = ((PileupElement)obj).getRead(); - else - read = (GATKSAMRecord)obj; - - final SAMReadGroupRecord readGroup = read.getReadGroup(); - log.println(String.format("%s\t%s\t%s\t%s", read.getReadName(), readGroup.getSample(), readGroup.getLibrary(), readGroup.getPlatformUnit())); + // create a bitset describing which elements to remove + final BitSet itemsToRemove = new BitSet(originalElementCount); + for ( final Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(originalElementCount, numElementsToRemove) ) { + itemsToRemove.set(selectedIndex); } - } + int currentBitSetIndex = 0; + for ( final GATKSAMRecord read : reads ) { + if ( read.isReducedRead() ) + throw new IllegalStateException("Allele-biased downsampling of reduced reads has not been implemented for a list of GATKSAMRecords"); + + if ( itemsToRemove.get(currentBitSetIndex++) ) + elementsToRemove.add(read); + } + + return elementsToRemove; + } /** * Create sample-contamination maps from file diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java index 8134b1257..150e24c51 100644 --- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.genotyper; import com.google.java.contract.Ensures; -import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.variant.variantcontext.Allele; -import java.io.PrintStream; import java.util.*; /** @@ -44,9 +42,8 @@ import java.util.*; * For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood. */ public class PerReadAlleleLikelihoodMap { - private final static Logger logger = Logger.getLogger(PerReadAlleleLikelihoodMap.class); - protected List alleles; - protected Map> likelihoodReadMap; + protected final List alleles; + protected final Map> likelihoodReadMap; public PerReadAlleleLikelihoodMap() { likelihoodReadMap = new LinkedHashMap>(); @@ -78,17 +75,16 @@ public class PerReadAlleleLikelihoodMap { } - public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) { - return AlleleBiasedDownsamplingUtils.createAlleleBiasedBasePileup(pileup, downsamplingFraction, log); + public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) { + return AlleleBiasedDownsamplingUtils.createAlleleBiasedBasePileup(pileup, downsamplingFraction); } /** * For each allele "a" , identify those reads whose most likely allele is "a", and remove a "downsamplingFraction" proportion * of those reads from the "likelihoodReadMap". This is used for e.g. sample contamination * @param downsamplingFraction - the fraction of supporting reads to remove from each allele. If <=0 all reads kept, if >=1 all reads tossed. - * @param log - a PrintStream to log the removed reads to (passed through to the utility function) */ - public void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log) { + public void performPerAlleleDownsampling(final double downsamplingFraction) { // special case removal of all or no reads if ( downsamplingFraction <= 0.0 ) return; @@ -101,7 +97,7 @@ public class PerReadAlleleLikelihoodMap { final Map> alleleReadMap = getAlleleStratifiedReadMap(); // compute the reads to remove and actually remove them - final List readsToRemove = AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(alleleReadMap, downsamplingFraction, log); + final List readsToRemove = AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(alleleReadMap, downsamplingFraction); for ( final GATKSAMRecord read : readsToRemove ) likelihoodReadMap.remove(read); } @@ -117,7 +113,8 @@ public class PerReadAlleleLikelihoodMap { alleleReadMap.put(allele, new ArrayList()); for ( final Map.Entry> entry : likelihoodReadMap.entrySet() ) { - // do not remove reduced reads! + // TODO -- come up with a strategy for down-sampling reduced reads + // Currently we are unable to remove reduced reads because their representative base count differs throughout the read if ( !entry.getKey().isReducedRead() ) { final MostLikelyAllele bestAllele = getMostLikelyAllele(entry.getValue()); if ( bestAllele.isInformative() ) diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 51753ca5e..ba5dee34c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -303,7 +303,7 @@ public class PileupElement implements Comparable { * this being a reduced read and a deletion, we return the average number of elements between the left * and right elements to the deletion. We assume the deletion to be left aligned. * - * @return + * @return the representative count */ public int getRepresentativeCount() { if (read.isReducedRead()) { @@ -318,6 +318,19 @@ public class PileupElement implements Comparable { } } + /** + * Adjusts the representative count of this pileup element. + * Throws an exception if this element does not represent a reduced read. + * + * @param adjustmentFactor how much to adjust the representative count (can be positive or negative) + */ + public void adjustRepresentativeCount(final int adjustmentFactor) { + if ( read.isReducedRead() ) + read.adjustReducedCount(offset, adjustmentFactor); + else + throw new IllegalArgumentException("Trying to adjust the representative count of a read that is not reduced"); + } + /** * Get the cigar element aligning this element to the genome * @return a non-null CigarElement diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 0e672b3d7..e4a2bed44 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -400,7 +400,40 @@ public class GATKSAMRecord extends BAMRecord { * @return the number of bases corresponding to the i'th base of the reduced read */ public final byte getReducedCount(final int i) { - return getReducedReadCounts()[i]; + if ( !isReducedRead() ) + throw new IllegalArgumentException("error trying to retrieve the reduced count from a read that is not reduced"); + final byte[] reducedCounts = getReducedReadCounts(); + return reducedCounts[i]; + } + + /** + * Sets the number of bases corresponding the i'th base of the reduced read. + * + * @param i the read based coordinate inside the read + * @param count the new count + */ + public final void setReducedCount(final int i, final byte count) { + if ( count < 0 ) + throw new IllegalArgumentException("the reduced count cannot be set to a negative value"); + if ( !isReducedRead() ) + throw new IllegalArgumentException("error trying to set the reduced count for a read that is not reduced"); + final byte[] reducedCounts = getReducedReadCounts(); + reducedCounts[i] = count; + setReducedReadCountsTag(reducedCounts); + } + + /** + * Sets the number of bases corresponding the i'th base of the reduced read. + * + * @param i the read based coordinate inside the read + * @param adjustmentFactor how much to add/subtract to the current count + */ + public final void adjustReducedCount(final int i, final int adjustmentFactor) { + if ( !isReducedRead() ) + throw new IllegalArgumentException("error trying to set the reduced count for a read that is not reduced"); + final byte[] reducedCounts = getReducedReadCounts(); + final byte newCount = (byte)(reducedCounts[i] + adjustmentFactor); + setReducedCount(i, newCount); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java index 23b940491..6314d4681 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java @@ -25,18 +25,21 @@ package org.broadinstitute.sting.gatk.downsampling; +import net.sf.samtools.CigarElement; +import net.sf.samtools.CigarOperator; +import net.sf.samtools.SAMFileHeader; import org.apache.log4j.Logger; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; +import java.util.*; /** @@ -115,6 +118,51 @@ public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest { return true; } + @DataProvider(name = "BiasedDownsamplingTest") + public Object[][] makeBiasedDownsamplingTest() { + final List tests = new LinkedList(); + + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + + for ( final int originalNormalCount : Arrays.asList(0, 1, 2, 10, 1000) ) { + for ( final int originalReducedCount : Arrays.asList(0, 1, 2, 10, 100) ) { + for ( final int indexToPutReducedRead : Arrays.asList(0, 2, originalNormalCount) ) { + if ( originalReducedCount == 0 || indexToPutReducedRead > originalNormalCount ) + continue; + for ( final int toRemove : Arrays.asList(0, 1, 2, 10, 1000) ) { + if ( toRemove <= originalNormalCount + originalReducedCount ) + tests.add(new Object[]{header, originalNormalCount, originalReducedCount, indexToPutReducedRead, toRemove}); + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "BiasedDownsamplingTest") + public void testBiasedDownsampling(final SAMFileHeader header, final int originalNormalCount, final int originalReducedCount, final int indexToPutReducedRead, final int toRemove) { + + final LinkedList elements = new LinkedList(); + for ( int i = 0; i < originalNormalCount; i++ ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 1); + elements.add(new PileupElement(read, 0, new CigarElement(1, CigarOperator.M), 0, 0)); + } + if ( originalReducedCount > 0 ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 1); + read.setReducedReadCountsTag(new byte[]{(byte)originalReducedCount}); + elements.add(indexToPutReducedRead, new PileupElement(read, 0, new CigarElement(1, CigarOperator.M), 0, 0)); + } + + final List result = AlleleBiasedDownsamplingUtils.downsampleElements(elements, originalNormalCount + originalReducedCount, toRemove); + int pileupCount = 0; + for ( final PileupElement pe : elements ) // reduced reads may have gotten modified + pileupCount += pe.getRepresentativeCount(); + for ( final PileupElement pe : result ) + pileupCount -= pe.getRepresentativeCount(); + + Assert.assertEquals(pileupCount, originalNormalCount + originalReducedCount - toRemove); + } @Test public void testLoadContaminationFileDetails(){ diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java index 57a7946ae..06cdb366e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -39,7 +39,7 @@ public class GATKSAMRecordUnitTest extends BaseTest { final static String BASES = "ACTG"; final static String QUALS = "!+5?"; final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; - final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets + final private static byte[] REDUCED_READ_COUNTS_OFFSETS = new byte[]{10, 10, 20, 30, -9}; // just the offsets @BeforeClass public void init() { @@ -52,7 +52,7 @@ public class GATKSAMRecordUnitTest extends BaseTest { reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); reducedRead.setReadBases(BASES.getBytes()); reducedRead.setBaseQualityString(QUALS); - reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); + reducedRead.setReducedReadCountsTag(REDUCED_READ_COUNTS_OFFSETS); } @Test @@ -66,6 +66,36 @@ public class GATKSAMRecordUnitTest extends BaseTest { } } + @Test(expectedExceptions = IllegalArgumentException.class) + public void testGetReducedCountOnNormalRead() { + read.getReducedCount(0); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSetReducedTagOnNormalRead() { + read.setReducedCount(0, (byte)2); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testSetReducedCountToNegativeNumber() { + reducedRead.setReducedCount(0, (byte)1000); + } + + @Test + public void testSetReducedTagOnReducedRead() { + for (int i = 0; i < reducedRead.getReadLength(); i++) { + final byte newCount = (byte)i; + reducedRead.setReducedCount(i, newCount); + Assert.assertEquals(reducedRead.getReducedCount(i), newCount, "Reduced read count not set to the expected value at " + i); + } + + for (int i = 0; i < reducedRead.getReadLength(); i++) { + final int newCount = reducedRead.getReducedCount(i) + i; + reducedRead.adjustReducedCount(i, i); + Assert.assertEquals(reducedRead.getReducedCount(i), newCount, "Reduced read count not set to the expected value at " + i); + } + } + @Test public void testReducedReadPileupElement() { PileupElement readp = LocusIteratorByState.createPileupForReadAndOffset(read, 0); From 021adf4220392c999ec203ca567d55a5267d070e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 25 Apr 2013 15:39:42 -0400 Subject: [PATCH 2/2] WTF - I thought we had disabled the randomized dithering of rank sum tests for integration tests?! Well, it wasn't done so I went ahead and did so. Lots of MD5 changes accordingly. --- .../gatk/walkers/annotator/RankSumTest.java | 2 +- ...dGenotyperIndelCallingIntegrationTest.java | 22 +++++----- .../UnifiedGenotyperIntegrationTest.java | 40 +++++++++---------- ...GenotyperNormalCallingIntegrationTest.java | 24 +++++------ ...dGenotyperReducedReadsIntegrationTest.java | 10 ++--- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +-- .../HaplotypeCallerIntegrationTest.java | 16 ++++---- 7 files changed, 60 insertions(+), 60 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index ec107512a..ef456824e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -183,6 +183,6 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR * @param headerLines */ public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set headerLines ) { - useDithering = ! toolkit.getArguments().disableRandomization; + useDithering = ! toolkit.getArguments().disableDithering; } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index ece92f50f..c33e89b99 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -56,8 +56,8 @@ import java.util.List; public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { - private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommandIndels = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; // -------------------------------------------------------------------------------------------------------------- // @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("118ed5b54fc9ce1cde89f06a20afebef")); + Arrays.asList("d8b0c5be39ec6b239641c2f2646d2bc3")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -88,7 +88,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("6ef59013331bc031ea37807b325d7d2c")); + Arrays.asList("d9572a227ccb13a6baa6dc4fb65bc1e5")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("dd3ee4675377191e34aaf67335e0219a")); + Arrays.asList("54e13f696f56eb742bf449ad11d0dc5f")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("bb06ef8262f91664b7d2fe7e1e5df195")); + Arrays.asList("16d975480ff1e689113171805b916b62")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("0a2a8cc2d1a79e84624836a31de5491c")); + Arrays.asList("60ed3f8d5bc3f765e6ce3fa698b68bb7")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("939f80c6d2dfb592956aed3bdeaf319d")); + Arrays.asList("e87f5c76661527ef7aa44e528fe19573")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("fc937f92e59dfe07b894411b5dfc166a")); + Arrays.asList("264325878b988acc11d8e5d9d2ba0b7f")); executeTest("test minIndelFraction 0.0", spec); } @@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("41ad9e0edca4b9987390ba5c07f39e4a")); + Arrays.asList("98abcfb0a008050eba8b9c285a25b2a0")); executeTest("test minIndelFraction 0.25", spec); } @@ -200,7 +200,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { @Test public void testHaplotype0Length() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -I " + privateTestDir + "haplotype0.bam -L 20:47507681 -R " + b37KGReference + " -baq CALCULATE_AS_NECESSARY -glm BOTH -o /dev/null", + "-T UnifiedGenotyper --disableDithering -I " + privateTestDir + "haplotype0.bam -L 20:47507681 -R " + b37KGReference + " -baq CALCULATE_AS_NECESSARY -glm BOTH -o /dev/null", 0, Collections.emptyList()); executeTest("testHaplotype0Length", spec); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index c89b1dfbf..605be3b2d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -60,8 +60,8 @@ import java.util.Collections; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; + private final static String baseCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; // -------------------------------------------------------------------------------------------------------------- // @@ -73,15 +73,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff")); + Arrays.asList("30be17df00acc8a92223f51fe7c1bdf7")); executeTest("test min_base_quality_score 26", spec); } @Test public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("55760482335497086458b09e415ecf54")); + "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, + Arrays.asList("4aa226c00a242047cf427d0919003048")); executeTest("test SLOD", spec); } @@ -89,15 +89,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("938e888a40182878be4c3cc4859adb69")); + Arrays.asList("17f65eca1e6c1f06919a58f230b6d8d3")); executeTest("test NDA", spec); } @Test public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("7dc186d420487e4e156a24ec8dea0951")); + "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, + Arrays.asList("50937942e3d228614d2531c3be237709")); executeTest("test using comp track", spec); } @@ -111,17 +111,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9"); + testOutputParameters("-sites_only", "48cd40d3994911a6f2609bfd375e1d2d"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "5649f72de04e1391e0f2bb86843d3d72"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "28f40ce47651f504158fc4e5bb58df4b"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "cb151bb9e90680b12714d481091ed209"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "5259dafaa1b57d9489003b16a48e35f8"); } private void testOutputParameters(final String args, final String md5) { @@ -135,7 +135,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb")); + Arrays.asList("918109938ef355d759dafc3ebb47d8a5")); executeTest("test confidence 1", spec1); } @@ -143,7 +143,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNoPrior() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -noPrior", 1, - Arrays.asList("422656266117f8d01e17e5c491c49a24")); + Arrays.asList("7ac60bdc355d97c0939e644b58de47d7")); executeTest("test no prior 1", spec1); } @@ -155,12 +155,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "ffc1f83a045dc09360e11de7a8efd159" ); + testHeterozosity( 0.01, "3b66f82dbb746875638e076bf51a1583" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "5426a98df9f5fd70aef295d889c4e4f1" ); + testHeterozosity( 1.0 / 1850, "714c1795334c7c62c046a75479381ae6" ); } private void testHeterozosity(final double arg, final String md5) { @@ -176,7 +176,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "d5a7326fdcf6d441b73c381912ad3a2a"; + private final static String COMPRESSED_OUTPUT_MD5 = "6f79205f7ed8006470f056f6805db6c8"; @Test public void testCompressedOutput() { @@ -232,7 +232,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("0a4a78da876bfa3d42170249a94357b4")); + Arrays.asList("d995b76adc3766889f7c2c88da14055c")); executeTest(String.format("test multiple technologies"), spec); } @@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("89182fd4d9532ab4b2a0a84bfb557089")); + Arrays.asList("9669e1643d22d5ad047b3941aeefd6db")); executeTest(String.format("test calling with BAQ"), spec); } @@ -281,8 +281,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testNsInCigar() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1, - Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6")); + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1, + Arrays.asList("2ae3fd39c53a6954d32faed8703adfe8")); executeTest("test calling on reads with Ns in CIGAR", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index a58d3f3a8..a35cb1ecc 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -53,7 +53,7 @@ import java.util.Arrays; public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ - private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; // -------------------------------------------------------------------------------------------------------------- // @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("52b6086f4597da5b35ab902bea4066fc")); + Arrays.asList("e3efd1917192ea743ac1e9958aa0a98f")); executeTest("test MultiSample Pilot1", spec); } @@ -72,7 +72,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("5b31b811072a4df04524e13604015f9b")); + Arrays.asList("ebfcc3dd8c1788929cb50050c5d456df")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("d9992e55381afb43742cc9b30fcd7538")); + Arrays.asList("698e54aeae3130779d246b9480a4052c")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -88,22 +88,22 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("33ab66c2f062cfa1f7fcc077165f778c")); + Arrays.asList("aaadb2a355d87344eabb6ac4495a11e4")); executeTest("test SingleSample Pilot2", spec); } @Test public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("28bfbff3da3af43d6a1eff673e5cb0f8")); + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, + Arrays.asList("09a1a4d4bf0289bcc5e8a958f783a989")); executeTest("test Multiple SNP alleles", spec); } @Test public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, Arrays.asList("d915535c1458733f09f82670092fcab6")); executeTest("test bad read", spec); } @@ -111,16 +111,16 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ @Test public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("a9edd04374ee9c42970291f39a50c191")); + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, + Arrays.asList("57a1bb44967988f2b7ae7779127990ae")); executeTest("test reverse trim", spec); } @Test public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("6fc32ca9de769060f3c2a3d94f8f2f91")); + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + Arrays.asList("3011c20165951ca43c8a4e86a5835dbd")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index c3597b6aa..191bf3c27 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -62,25 +62,25 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("f82389f2bc6d3f932a36be65b60af648")); + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + Arrays.asList("da3fd775c8add1f7962baabf06b7d372")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "c87f89af948a554cc66bc3afa5251c3b"); + testReducedCalling("SNP", "76244ab1be60814f1412e6cd09e546cc"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "ed4ddc42447ec037c1e14757b6cf0515"); + testReducedCalling("INDEL", "9a986b98ed014576ce923e07452447f4"); } private void testReducedCalling(final String model, final String md5) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-10,500,000 -glm " + model, 1, + "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-10,500,000 -glm " + model, 1, Arrays.asList(md5)); executeTest("test calling on a ReducedRead BAM with " + model, spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 5fe4e6dfa..d3f3a9936 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -57,7 +57,7 @@ import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCal public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest { private void HCTestComplexVariants(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4"; final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); } @@ -68,7 +68,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa } private void HCTestSymbolicVariants(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1"; final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec); } @@ -80,7 +80,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa } private void HCTestComplexGGA(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerComplexGGA: args=" + args, spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index fff1c0bb9..02567f188 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -73,7 +73,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; private void HCTest(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCaller: args=" + args, spec); } @@ -105,7 +105,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { } private void HCTestIndelQualityScores(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec); } @@ -120,7 +120,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference)); final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary()); - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); for( final File vcf : executeTest("testHaplotypeCallerNearbySmallIntervals: args=" + args, spec).getFirst() ) { if( containsDuplicateRecord(vcf, parser) ) { @@ -158,14 +158,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { // any of the calls in that region because it is so messy. @Test public void HCTestProblematicReadsModifiedInActiveRegions() { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("0689d2c202849fd05617648eaf429b9a")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922")); executeTest("HCTestStructuralIndels: ", spec); } @@ -173,7 +173,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestDoesNotFailOnBadRefBase() { // don't care about the output - just want to make sure it doesn't fail - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2 -stand_emit_conf 2"; + final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2 -stand_emit_conf 2"; final WalkerTestSpec spec = new WalkerTestSpec(base, Collections.emptyList()); executeTest("HCTestDoesNotFailOnBadRefBase: ", spec); } @@ -187,7 +187,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, Arrays.asList("3c87eb93ffe3a0166aca753050b981e1")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -195,7 +195,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, + "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, Arrays.asList("8adfa8a27a312760dab50787da595c57")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); }