Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ami Levy Moonshine 2012-11-14 09:42:35 -05:00
commit 49869344b8
10 changed files with 284 additions and 65 deletions

View File

@ -67,7 +67,6 @@ public class AlleleBiasedDownsamplingUtils {
alleleStratifiedElements[baseIndex].add(pe);
}
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
// Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor
final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
@ -78,12 +77,21 @@ public class AlleleBiasedDownsamplingUtils {
}
});
// make a listing of allele counts
final int[] alleleCounts = new int[4];
for ( int i = 0; i < 4; i++ )
alleleCounts[i] = alleleStratifiedElements[i].size();
// do smart down-sampling
final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
for ( int i = 0; i < 4; i++ ) {
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
if ( alleleList.size() <= numReadsToRemove )
logAllElements(alleleList, log);
// if we don't need to remove any reads, keep them all
if ( alleleList.size() <= targetAlleleCounts[i] )
elementsToKeep.addAll(alleleList);
else
elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove, log));
elementsToKeep.addAll(downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log));
}
// clean up pointers so memory can be garbage collected if needed
@ -93,6 +101,66 @@ public class AlleleBiasedDownsamplingUtils {
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
}
private static int scoreAlleleCounts(final int[] alleleCounts) {
if ( alleleCounts.length < 2 )
return 0;
// sort the counts (in ascending order)
final int[] alleleCountsCopy = alleleCounts.clone();
Arrays.sort(alleleCountsCopy);
final int maxCount = alleleCountsCopy[alleleCounts.length - 1];
final int nextBestCount = alleleCountsCopy[alleleCounts.length - 2];
int remainderCount = 0;
for ( int i = 0; i < alleleCounts.length - 2; i++ )
remainderCount += alleleCountsCopy[i];
// try to get the best score:
// - in the het case the counts should be equal with nothing else
// - in the hom case the non-max should be zero
return Math.min(maxCount - nextBestCount + remainderCount, Math.abs(nextBestCount + remainderCount));
}
/**
* Computes an allele biased version of the given pileup
*
* @param alleleCounts the original pileup
* @param numReadsToRemove fraction of total reads to remove per allele
* @return allele biased pileup
*/
protected static int[] runSmartDownsampling(final int[] alleleCounts, final int numReadsToRemove) {
final int numAlleles = alleleCounts.length;
int maxScore = scoreAlleleCounts(alleleCounts);
int[] alleleCountsOfMax = alleleCounts;
final int numReadsToRemovePerAllele = numReadsToRemove / 2;
for ( int i = 0; i < numAlleles; i++ ) {
for ( int j = i; j < numAlleles; j++ ) {
final int[] newCounts = alleleCounts.clone();
// split these cases so we don't lose on the floor (since we divided by 2)
if ( i == j ) {
newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemove);
} else {
newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemovePerAllele);
newCounts[j] = Math.max(0, newCounts[j] - numReadsToRemovePerAllele);
}
final int score = scoreAlleleCounts(newCounts);
if ( score < maxScore ) {
maxScore = score;
alleleCountsOfMax = newCounts;
}
}
}
return alleleCountsOfMax;
}
/**
* Performs allele biased down-sampling on a pileup and computes the list of elements to keep
*
@ -102,7 +170,15 @@ public class AlleleBiasedDownsamplingUtils {
* @return the list of pileup elements TO KEEP
*/
private static List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove, final PrintStream log) {
if ( numElementsToRemove == 0 )
return elements;
final int pileupSize = elements.size();
if ( numElementsToRemove == pileupSize ) {
logAllElements(elements, log);
return new ArrayList<PileupElement>(0);
}
final BitSet itemsToRemove = new BitSet(pileupSize);
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
itemsToRemove.set(selectedIndex);
@ -132,15 +208,25 @@ public class AlleleBiasedDownsamplingUtils {
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() )
totalReads += reads.size();
// Down-sample *each* allele by the contamination fraction applied to the entire pileup.
int numReadsToRemove = (int)(totalReads * downsamplingFraction);
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove * alleleReadMap.size());
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() ) {
if ( reads.size() <= numReadsToRemove ) {
readsToRemove.addAll(reads);
logAllReads(reads, log);
} else {
readsToRemove.addAll(downsampleReads(reads, numReadsToRemove, log));
// make a listing of allele counts
final List<Allele> alleles = new ArrayList<Allele>(alleleReadMap.keySet());
alleles.remove(Allele.NO_CALL); // ignore the no-call bin
final int numAlleles = alleles.size();
final int[] alleleCounts = new int[numAlleles];
for ( int i = 0; i < numAlleles; i++ )
alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size();
// do smart down-sampling
final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove);
for ( int i = 0; i < numAlleles; i++ ) {
final List<GATKSAMRecord> alleleBin = alleleReadMap.get(alleles.get(i));
if ( alleleBin.size() > targetAlleleCounts[i] ) {
readsToRemove.addAll(downsampleReads(alleleBin, alleleBin.size() - targetAlleleCounts[i], log));
}
}
@ -156,13 +242,22 @@ public class AlleleBiasedDownsamplingUtils {
* @return the list of pileup elements TO REMOVE
*/
private static List<GATKSAMRecord> downsampleReads(final List<GATKSAMRecord> reads, final int numElementsToRemove, final PrintStream log) {
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numElementsToRemove);
if ( numElementsToRemove == 0 )
return readsToRemove;
final int pileupSize = reads.size();
if ( numElementsToRemove == pileupSize ) {
logAllReads(reads, log);
return reads;
}
final BitSet itemsToRemove = new BitSet(pileupSize);
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
itemsToRemove.set(selectedIndex);
}
ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(pileupSize - numElementsToRemove);
for ( int i = 0; i < pileupSize; i++ ) {
if ( itemsToRemove.get(i) ) {
final GATKSAMRecord read = reads.get(i);

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
/**
* Basic unit test for AlleleBiasedDownsamplingUtils
*/
public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest {
@Test
public void testSmartDownsampling() {
final int[] idealHetAlleleCounts = new int[]{0, 50, 0, 50};
final int[] idealHomAlleleCounts = new int[]{0, 100, 0, 0};
// no contamination, no removal
testOneCase(0, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
// hom sample, het contaminant, different alleles
testOneCase(5, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
testOneCase(0, 0, 5, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
testOneCase(0, 0, 0, 5, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
// hom sample, hom contaminant, different alleles
testOneCase(10, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
testOneCase(0, 0, 10, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
testOneCase(0, 0, 0, 10, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
// het sample, het contaminant, different alleles
testOneCase(5, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 0, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
// het sample, hom contaminant, different alleles
testOneCase(10, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 0, 10, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
// hom sample, het contaminant, overlapping alleles
final int[] enhancedHomAlleleCounts = new int[]{0, 105, 0, 0};
testOneCase(5, 5, 0, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
testOneCase(0, 5, 5, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
testOneCase(0, 5, 0, 5, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
// hom sample, hom contaminant, overlapping alleles
testOneCase(0, 10, 0, 0, 0.1, 100, idealHomAlleleCounts, new int[]{0, 110, 0, 0});
// het sample, het contaminant, overlapping alleles
testOneCase(5, 5, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 5, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 5, 0, 5, 0.1, 100, idealHetAlleleCounts, new int[]{0, 55, 0, 55});
testOneCase(5, 0, 0, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 0, 5, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
// het sample, hom contaminant, overlapping alleles
testOneCase(0, 10, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
testOneCase(0, 0, 0, 10, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
}
private static void testOneCase(final int addA, final int addC, final int addG, final int addT, final double contaminationFraction,
final int pileupSize, final int[] initialCounts, final int[] targetCounts) {
final int[] actualCounts = initialCounts.clone();
actualCounts[0] += addA;
actualCounts[1] += addC;
actualCounts[2] += addG;
actualCounts[3] += addT;
final int[] results = AlleleBiasedDownsamplingUtils.runSmartDownsampling(actualCounts, (int)(pileupSize * contaminationFraction));
Assert.assertTrue(countsAreEqual(results, targetCounts));
}
private static boolean countsAreEqual(final int[] counts1, final int[] counts2) {
for ( int i = 0; i < 4; i++ ) {
if ( counts1[i] != counts2[i] )
return false;
}
return true;
}
}

View File

@ -90,6 +90,21 @@ public class BQSRIntegrationTest extends WalkerTest {
executeTest("testBQSRFailWithoutDBSNP", spec);
}
@Test
public void testBQSRCSV() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
" -T BaseRecalibrator" +
" -R " + b36KGReference +
" -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
" -knownSites " + b36dbSNP129 +
" -L 1:10,000,000-10,200,000" +
" -o /dev/null" +
" --plot_pdf_file /dev/null" +
" --intermediate_csv_file %s",
Arrays.asList("d1c38a3418979400630e2bca1140689c"));
executeTest("testBQSR-CSVfile", spec);
}
@Test
public void testBQSRFailWithSolidNoCall() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(

View File

@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a"));
Arrays.asList("847605f4efafef89529fe0e496315edd"));
executeTest("test MultiSample Pilot1", spec);
}
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de"));
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("24532eb381724cd74e99370da28d49ed"));
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("062a946160eec1d0fc135d58ca654ff4"));
Arrays.asList("fea530fdc8677e10be4cc11625fa5376"));
executeTest("test SingleSample Pilot2", spec);
}
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("94dc17d76d841f1d3a36160767ffa034"));
Arrays.asList("704888987baacff8c7b273b8ab9938d0"));
executeTest("test Multiple SNP alleles", spec);
}
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("9106d01ca0d0a8fedd068e72d509f380"));
Arrays.asList("e14c9b1f9f34d6c16de445bfa385be89"));
executeTest("test reverse trim", spec);
}
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("d847acf841ba8ba653f996ce4869f439"));
Arrays.asList("fb204e821a24d03bd3a671b6e01c449a"));
executeTest("test mismatched PLs", spec);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad";
private final static String COMPRESSED_OUTPUT_MD5 = "5b8f477c287770b5769b05591e35bc2d";
@Test
public void testCompressedOutput() {
@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("56157d930da6ccd224bce1ca93f11e41"));
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
executeTest("test min_base_quality_score 26", spec);
}
@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603"));
Arrays.asList("55760482335497086458b09e415ecf54"));
executeTest("test SLOD", spec);
}
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("480437dd6e2760f4ab3194431519f331"));
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
executeTest("test NDA", spec);
}
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("22c039412fd387dde6125b07c9a74a25"));
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
executeTest("test using comp track", spec);
}
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb");
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841");
testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434");
}
private void testOutputParameters(final String args, final String md5) {
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("df524e98903d96ab9353bee7c16a69de"));
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
executeTest("test confidence 1", spec1);
}
@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" );
testHeterozosity( 0.01, "8dd37249e0a80afa86594c3f1e720760" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" );
testHeterozosity( 1.0 / 1850, "040d169e20fda56f8de009a6015eb384" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835"));
Arrays.asList("0e4713e4aa44f4f8fcfea7138295a627"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52"));
Arrays.asList("46ea5d1ceb8eed1d0db63c3577915d6c"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("3593495aab5f6204c65de0b073a6ff65"));
Arrays.asList("50329e15e5139be9e3b643f0b3ba8a53"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("8b486a098029d5a106b0a37eff541c15"));
Arrays.asList("2b85e3bd6bf981afaf7324666740d74b"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("18efedc50cae2aacaba372265e38310b"));
Arrays.asList("a6fd46eff78827060451a62cffd698a7"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f"));
Arrays.asList("b8129bf754490cc3c76191d8cc4ec93f"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("578c0540f4f2052a634a829bcb9cc27d"));
Arrays.asList("591332fa0b5b22778cf820ee257049d2"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e"));
Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352"));
Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("857b8e5df444463ac27f665c4f67fbe2"));
Arrays.asList("90adefd39ed67865b0cb275ad0f07383"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("81d4c7d9010fd6733b2997bc378e7471"));
Arrays.asList("2fded43949e258f8e9f68893c61c1bdd"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f"));
Arrays.asList("d6d40bacd540a41f305420dfea35e04a"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba"));
Arrays.asList("c1077662411164182c5f75478344f83d"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e");
testReducedCalling("SNP", "f5ccbc96d0d66832dd9b3c5cb6507db4");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5");
testReducedCalling("INDEL", "3c02ee5187933bed44dc416a2e28511f");
}
@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testContaminationDownsampling() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae"));
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}

View File

@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
Arrays.asList("425f1a0fb00d7145edf1c55e54346fae"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}

View File

@ -75,8 +75,9 @@ public class RecalibrationArgumentCollection {
/**
* If not provided, then a temporary file is created and then deleted upon completion.
* For advanced users only.
*/
@Hidden
@Advanced
@Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
public File RECAL_CSV_FILE = null;

View File

@ -191,7 +191,7 @@ public class CallableLoci extends LocusWalker<CallableLoci.CallableBaseState, Ca
*/
@Advanced
@Argument(fullName = "format", shortName = "format", doc = "Output format", required = false)
OutputFormat outputFormat;
OutputFormat outputFormat = OutputFormat.BED;
public enum OutputFormat {
/**
@ -297,7 +297,7 @@ public class CallableLoci extends LocusWalker<CallableLoci.CallableBaseState, Ca
}
public String toString() {
return String.format("%s %d %d %s", loc.getContig(), loc.getStart(), loc.getStop(), state);
return String.format("%s\t%d\t%d\t%s", loc.getContig(), loc.getStart()-1, loc.getStop(), state);
}
}

View File

@ -199,7 +199,7 @@ public class RecalDatum {
@Override
public String toString() {
return String.format("%.2f,%,2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
return String.format("%.2f,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
}
public String stringForCSV() {

View File

@ -646,7 +646,7 @@ public class AlignmentUtils {
// get the indel element and move it left one base
CigarElement ce = cigar.getCigarElement(indexOfIndel - 1);
elements.add(new CigarElement(ce.getLength() - 1, ce.getOperator()));
elements.add(new CigarElement(Math.max(ce.getLength() - 1, 0), ce.getOperator()));
elements.add(cigar.getCigarElement(indexOfIndel));
if (indexOfIndel + 1 < cigar.numCigarElements()) {
ce = cigar.getCigarElement(indexOfIndel + 1);

View File

@ -38,7 +38,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalkerBed() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("9e4ec9c23f21a8162d27a39ab057398c", SUMMARY_MD5));
Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5));
executeTest("formatBed", spec);
}
@ -46,7 +46,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalkerPerBase() {
String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("e6044b4495ef24f542403e6a94437068", SUMMARY_MD5));
Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5));
executeTest("format_state_per_base", spec);
}
@ -54,7 +54,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalker2() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100 -L 1:10,000,110-10,000,120 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("c671f65712d9575b8b3e1f1dbedc146e", "d287510eac04acf5a56f5cde2cba0e4a"));
Arrays.asList("330f476085533db92a9dbdb3a127c041", "d287510eac04acf5a56f5cde2cba0e4a"));
executeTest("formatBed by interval", spec);
}
@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalker3() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7"));
Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7"));
executeTest("formatBed lots of arguments", spec);
}
}