diff --git a/build.xml b/build.xml
index 47e4eeb47..3aead62d6 100644
--- a/build.xml
+++ b/build.xml
@@ -111,10 +111,9 @@
-
+
-
@@ -273,19 +272,19 @@
-
-
-
-
-
-
+
+
-
-
-
-
-
-
+
+
@@ -1031,6 +1030,14 @@
+
+
+
+
+
+
+
+
@@ -1038,7 +1045,7 @@
-
+
@@ -1135,12 +1142,35 @@
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -1148,6 +1178,7 @@
+
@@ -1247,7 +1278,7 @@
-
+
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
index cdd31a5ef..5cdc15e5e 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSNP_ACS_Pools() {
- PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
+ PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","651469eeacdb3ab9e2690cfb71f6a634");
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
- PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc");
+ PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","be7dc20bdb5f200d189706bcf1aeb7ee");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
- PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7");
+ PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","25e5ea86d87b7d7ddaad834a6ed7481d");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
- PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","6987b89e04dcb604d3743bb09aa9587d");
+ PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","cdbf268d282e57189a88fb83f0e1fd72");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
- PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d0780f70365ed1b431099fd3b4cec449");
+ PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","2ed40925cd112c1a45470d215b7ec4b3");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
- PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
+ PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","33695a998bcc906cabcc758727004387");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
- PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
+ PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "b2725242114bf9cc9bca14679705ba40");
}
}
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index a8ba92634..e40a7ed38 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
- Arrays.asList("847605f4efafef89529fe0e496315edd"));
+ Arrays.asList("2ba9af34d2a4d55caf152265a30ead46"));
executeTest("test MultiSample Pilot1", spec);
}
@@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
- Arrays.asList("5b31b811072a4df04524e13604015f9b"));
+ Arrays.asList("0630c35c070d7a7e0cf22b3cce797f22"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
- Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
+ Arrays.asList("5857dcb4e6a8422ae0813e42d433b122"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
- Arrays.asList("fea530fdc8677e10be4cc11625fa5376"));
+ Arrays.asList("489deda5d3276545364a06b7385f8bd9"));
executeTest("test SingleSample Pilot2", spec);
}
@@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
- Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e"));
+ Arrays.asList("595ba44c75d08dab98df222b8e61ab70"));
executeTest("test Multiple SNP alleles", spec);
}
@@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
- Arrays.asList("d915535c1458733f09f82670092fcab6"));
+ Arrays.asList("360f9795facdaa14c0cb4b05207142e4"));
executeTest("test bad read", spec);
}
@@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
- Arrays.asList("e14c9b1f9f34d6c16de445bfa385be89"));
+ Arrays.asList("4b4a62429f8eac1e2f27ba5e2edea9e5"));
executeTest("test reverse trim", spec);
}
@@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
- Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
+ Arrays.asList("cc892c91a93dbd8dbdf645803f35a0ee"));
executeTest("test mismatched PLs", spec);
}
@@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
- private final static String COMPRESSED_OUTPUT_MD5 = "af8187e2baf516dde1cddea787a52b8a";
+ private final static String COMPRESSED_OUTPUT_MD5 = "3fc7d2681ff753e2d68605d7cf8b63e3";
@Test
public void testCompressedOutput() {
@@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
- Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
+ Arrays.asList("04dc83d7dfb42b8cada91647bd9f32f1"));
executeTest("test min_base_quality_score 26", spec);
}
@@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
- Arrays.asList("55760482335497086458b09e415ecf54"));
+ Arrays.asList("4429a665a1048f958db3c204297cdb9f"));
executeTest("test SLOD", spec);
}
@@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
- Arrays.asList("938e888a40182878be4c3cc4859adb69"));
+ Arrays.asList("f063e3573c513eaa9ce7d7df22143362"));
executeTest("test NDA", spec);
}
@@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
- Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
+ Arrays.asList("d76e93e2676354dde832f08a508c6f88"));
executeTest("test using comp track", spec);
}
@@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
- testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
+ testOutputParameters("-sites_only", "1a65172b9bd7a2023d48bc758747b34a");
}
@Test
public void testOutputParameterAllConfident() {
- testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
+ testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "3f1fa34d8440f6f21654ce60c0ba8f28");
}
@Test
public void testOutputParameterAllSites() {
- testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
+ testOutputParameters("--output_mode EMIT_ALL_SITES", "f240434b4d3c234f6f9e349e9ec05f4e");
}
private void testOutputParameters(final String args, final String md5) {
@@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
- Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
+ Arrays.asList("aec378bed312b3557c6dd7ec740c8091"));
executeTest("test confidence 1", spec1);
}
@@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
- testHeterozosity( 0.01, "8dd37249e0a80afa86594c3f1e720760" );
+ testHeterozosity( 0.01, "5da6b24033a6b02f466836443d49560e" );
}
@Test
public void testHeterozyosity2() {
- testHeterozosity( 1.0 / 1850, "040d169e20fda56f8de009a6015eb384" );
+ testHeterozosity( 1.0 / 1850, "1f284c4af967a3c26687164f9441fb16" );
}
private void testHeterozosity(final double arg, final String md5) {
@@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
- Arrays.asList("0e4713e4aa44f4f8fcfea7138295a627"));
+ Arrays.asList("cff553c53de970f64051ed5711407038"));
executeTest(String.format("test multiple technologies"), spec);
}
@@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
- Arrays.asList("46ea5d1ceb8eed1d0db63c3577915d6c"));
+ Arrays.asList("f960a91963e614a6c8d8cda57836df24"));
executeTest(String.format("test calling with BAQ"), spec);
}
@@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
- Arrays.asList("f6f8fbf733f20fbc1dd9ebaf8faefe6c"));
+ Arrays.asList("46a6d24c82ebb99d305462960fa09b7c"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
- Arrays.asList("4438ad0f03bbdd182d9bb59b15af0fa5"));
+ Arrays.asList("2be25321bbc6a963dba7ecba5dd76802"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
- Arrays.asList("27b4ace2ad5a83d8cccb040f97f29183"));
+ Arrays.asList("d6b2657cd5a4a949968cdab50efce515"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("b8129bf754490cc3c76191d8cc4ec93f"));
+ Arrays.asList("9cff66a321284c362f393bc4db21f756"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("591332fa0b5b22778cf820ee257049d2"));
+ Arrays.asList("90c8cfcf65152534c16ed81104fc3bcd"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
- Arrays.asList("d3d518448b01bf0f751824b3d946cd04"));
+ Arrays.asList("457b8f899cf1665de61e75084dbb79d0"));
List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
- Arrays.asList("2ea18a3e8480718a80a415d3fea79f54"));
+ Arrays.asList("a13fe7aa3b9e8e091b3cf3442a056ec1"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@@ -361,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
- Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
+ Arrays.asList("d075ad318739c8c56bdce857da1e48b9"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@@ -373,7 +373,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 20:10,000,000-10,100,000",
1,
- Arrays.asList("1e0d2c15546c3b0959b00ffb75488b56"));
+ Arrays.asList("91c632ab17a1dd89ed19ebb20324f905"));
executeTest(String.format("test UG with base indel quality scores"), spec);
}
@@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
- Arrays.asList("90adefd39ed67865b0cb275ad0f07383"));
+ Arrays.asList("1d80e135d611fe19e1fb1882aa588a73"));
executeTest("test minIndelFraction 0.0", spec);
}
@@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
- Arrays.asList("2fded43949e258f8e9f68893c61c1bdd"));
+ Arrays.asList("752139616752902fca13c312d8fe5e22"));
executeTest("test minIndelFraction 0.25", spec);
}
@@ -423,7 +423,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
- Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
+ Arrays.asList("d66b9decf26e1704abda1a919ac149cd"));
executeTest("test minIndelFraction 1.0", spec);
}
@@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
- Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
+ Arrays.asList("b62ba9777efc05af4c36e2d4ce3ee67c"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
- Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
+ Arrays.asList("f72ecd00b2913f63788faa7dabb1d102"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
- testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
+ testReducedCalling("SNP", "f059743858004ceee325f2a7761a2362");
}
@Test
public void testReducedBamINDELs() {
- testReducedCalling("INDEL", "9d5418ddf1b227ae4d463995507f2b1c");
+ testReducedCalling("INDEL", "04845ba1ec7d8d8b0eab2ca6bdb9c1a6");
}
@@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testContaminationDownsampling() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
- Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
+ Arrays.asList("b500ad5959bce69f888a2fac024647e5"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index 8422d856e..bb9efe15d 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -21,19 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
- HCTest(CEUTRIO_BAM, "", "839de31b41d4186e2b12a5601525e894");
+ HCTest(CEUTRIO_BAM, "", "7122d4f0ef94c5274aa3047cfebe08ed");
}
@Test
public void testHaplotypeCallerSingleSample() {
- HCTest(NA12878_BAM, "", "2b68faa0e0493d92491d74b8f731963a");
+ HCTest(NA12878_BAM, "", "6cd6e6787521c07a7bae98766fd628ab");
}
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
- "a2d56179cd19a41f8bfb995e225320bb");
+ "44df2a9da4fbd2162ae44c3f2a6ef01f");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@@ -44,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
- HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd8d2ae8db9d98e932b0a7f345631eec");
+ HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a413eeb7a75cab0ab5370b4c08dcf8e");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
- HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
+ HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "77cf5b5273828dd1605bb23a5aeafcaa");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
- HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6380e25c1ec79c6ae2f891ced15bf4e1");
+ HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "87ca97f90e74caee35c35616c065821c");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
- final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3a096d6139d15dcab82f5b091d08489d"));
+ final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3df42d0550b51eb9b55aac61e8b3c452"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
- final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a518c7436544f2b5f71c9d9427ce1cce"));
+ final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4dbc72b72e3e2d9d812d5a398490e213"));
executeTest("HCTestStructuralIndels: ", spec);
}
@@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
- Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
+ Arrays.asList("f8c2745bf71f2659a57494fcaa2c103b"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 8d0cefaa4..f8aec1489 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -284,7 +284,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
protected boolean abortExecution() {
final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS);
if ( abort ) {
- final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4);
+ final AutoFormattingTime aft = new AutoFormattingTime(engine.getRuntimeLimitInNanoseconds(), -1, 4);
logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft);
}
return abort;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
index af27d9c6f..24bac9deb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
@@ -13,6 +13,7 @@ import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
@@ -68,16 +69,49 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
return null;
double QD = -10.0 * vc.getLog10PError() / (double)depth;
-
Map map = new HashMap();
+
+ if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
+ // adjust for the event length
+ int averageLengthNum = 0;
+ int averageLengthDenom = 0;
+ int refLength = vc.getReference().length();
+ for ( Allele a : vc.getAlternateAlleles() ) {
+ int numAllele = vc.getCalledChrCount(a);
+ int alleleSize;
+ if ( a.length() == refLength ) {
+ // SNP or MNP
+ byte[] a_bases = a.getBases();
+ byte[] ref_bases = vc.getReference().getBases();
+ int n_mismatch = 0;
+ for ( int idx = 0; idx < a_bases.length; idx++ ) {
+ if ( a_bases[idx] != ref_bases[idx] )
+ n_mismatch++;
+ }
+ alleleSize = n_mismatch;
+ }
+ else if ( a.isSymbolic() ) {
+ alleleSize = 1;
+ } else {
+ alleleSize = Math.abs(refLength-a.length());
+ }
+ averageLengthNum += alleleSize*numAllele;
+ averageLengthDenom += numAllele;
+ }
+ double averageLength = ( (double) averageLengthNum )/averageLengthDenom;
+ QD /= averageLength;
+ map.put(getKeyNames().get(1),String.format("%.2f",averageLength));
+ }
+
map.put(getKeyNames().get(0), String.format("%.2f", QD));
return map;
}
- public List getKeyNames() { return Arrays.asList("QD"); }
+ public List getKeyNames() { return Arrays.asList("QD","AAL"); }
public List getDescriptions() {
- return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
+ return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"),
+ new VCFInfoHeaderLine(getKeyNames().get(1), 1, VCFHeaderLineType.Float, "Average Allele Length"));
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
index c9270a6a7..3681451c6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
@@ -50,7 +50,7 @@ public class ExactCallLogger implements Cloneable {
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
originalCall.getLog10PosteriorOfAFGT0(),
- new AutoFormattingTime(runtime / 1e9).toString());
+ new AutoFormattingTime(runtime).toString());
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java
deleted file mode 100644
index 3854a4a8c..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SAMRecordCoordinateComparatorWithUnmappedReads.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package org.broadinstitute.sting.gatk.walkers.indels;
-
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMRecordCoordinateComparator;
-
-/**
- * Extends Picard's Comparator for sorting SAMRecords by coordinate. This one actually deals with unmapped reads
- * (among other things) sitting at the same position as their mates (so that they both can be put into the same set).
- */
-public class SAMRecordCoordinateComparatorWithUnmappedReads extends SAMRecordCoordinateComparator {
- public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
- int cmp = fileOrderCompare(samRecord1, samRecord2);
- if ( cmp != 0 )
- return cmp;
-
- // deal with unmapped reads
- if ( samRecord1.getReadUnmappedFlag() != samRecord2.getReadUnmappedFlag() )
- return (samRecord1.getReadUnmappedFlag()? 1: -1);
-
- if ( samRecord1.getReadNegativeStrandFlag() != samRecord2.getReadNegativeStrandFlag() )
- return (samRecord1.getReadNegativeStrandFlag()? 1: -1);
-
- // even the names can be the same
- cmp = samRecord1.getReadName().compareTo(samRecord2.getReadName());
- if ( cmp != 0 )
- return cmp;
-
- if ( samRecord1.getDuplicateReadFlag() != samRecord2.getDuplicateReadFlag() )
- return (samRecord1.getDuplicateReadFlag()? -1: 1);
-
- if ( samRecord1.getReadPairedFlag() && samRecord2.getReadPairedFlag() && samRecord1.getFirstOfPairFlag() != samRecord2.getFirstOfPairFlag() )
- return (samRecord1.getFirstOfPairFlag()? -1: 1);
-
- // such a case was actually observed
- return samRecord1.getMappingQuality() - samRecord2.getMappingQuality();
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java
deleted file mode 100644
index 68be1629c..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetector.java
+++ /dev/null
@@ -1,2291 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.indels;
-
-import net.sf.samtools.*;
-import org.apache.commons.jexl2.Expression;
-import org.apache.commons.jexl2.JexlContext;
-import org.apache.commons.jexl2.JexlEngine;
-import org.apache.commons.jexl2.MapContext;
-import org.broad.tribble.Feature;
-import org.broadinstitute.sting.commandline.*;
-import org.broadinstitute.sting.gatk.CommandLineGATK;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
-import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
-import org.broadinstitute.sting.gatk.filters.Platform454Filter;
-import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
-import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
-import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
-import org.broadinstitute.sting.gatk.walkers.ReadFilters;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
-import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
-import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
-import org.broadinstitute.variant.vcf.*;
-import org.broadinstitute.sting.utils.collections.CircularArray;
-import org.broadinstitute.sting.utils.collections.PrimitivePair;
-import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
-import org.broadinstitute.sting.utils.interval.IntervalUtils;
-import org.broadinstitute.sting.utils.interval.OverlappingIntervalIterator;
-import org.broadinstitute.sting.utils.sam.AlignmentUtils;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.variant.variantcontext.*;
-import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
-
-import java.io.*;
-import java.util.*;
-
-
-/**
- * Tool for calling indels in Tumor-Normal paired sample mode; this tool supports single-sample mode as well,
- * but this latter functionality is now superceded by UnifiedGenotyper.
- *
- *
- * This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
- * data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
- * include additional statistics such as mismatches and base qualitites around the calls, read strandness (how many
- * forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
- * statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
- * attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
- * metrics for the post-processing tools to make the final decision). The calls are performed by default
- * from a matched tumor-normal pair of samples. In this case, two (sets of) input bam files must be specified using tagged -I
- * command line arguments: normal and tumor bam(s) must be passed with -I:normal and -I:tumor arguments,
- * respectively. Indels are called from the tumor sample and annotated as germline
- * if even a weak evidence for the same indel, not necessarily a confident call, exists in the normal sample, or as somatic
- * if normal sample has coverage at the site but no indication for an indel. Note that strictly speaking the calling
- * is not even attempted in normal sample: if there is an indel in normal that is not detected/does not pass a threshold
- * in tumor sample, it will not be reported.
- *
- * To make indel calls and associated metrics for a single sample, this tool can be run with --unpaired flag (input
- * bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
- * on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
- *
- * Which (putative) calls will make it into the output file(s) is controlled by an expression/list of expressions passed with -filter
- * flag: if any of the expressions evaluate to TRUE, the site will be discarded. Otherwise the putative call and all the
- * associated statistics will be printed into the output. Expressions recognize the following variables(in paired-sample
- * somatic mode variables are prefixed with T_ and N_ for Tumor and Normal, e.g. N_COV and T_COV are defined instead of COV):
- * COV for coverage at the site, INDEL_F for fraction of reads supporting consensus indel at the site (wrt total coverage),
- * INDEL_CF for fraction of reads with consensus indel wrt all reads with an indel at the site, CONS_CNT for the count of
- * reads supporting the consensus indel at the site. Conventional arithmetic and logical operations are supported. For instance,
- * N_COV<4||T_COV<6||T_INDEL_F<0.3||T_INDEL_CF<0.7 instructs the tool to only output indel calls with at least 30% observed
- * allelic fraction and with consensus indel making at least 70% of all indel observations at the site, and only at the sites
- * where tumor coverage and normal coverage are at least 6 and 4, respectively.
- *
Input
- *
- * Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
- *
- *
- */
-
-@DocumentedGATKFeature( groupName = "Cancer-specific Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
-@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
-public class SomaticIndelDetector extends ReadWalker {
-// @Output
-// PrintStream out;
- @Output(doc="File to write variants (indels) in VCF format",required=true)
- protected VariantContextWriter vcf_writer = null;
-
- @Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
- @Deprecated
- java.io.File output_file;
-
- @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print callability metrics output", required = false)
- public PrintStream metricsWriter = null;
-
-// @Argument(fullName="vcf_format", shortName="vcf", doc="generate output file in VCF format", required=false)
-// boolean FORMAT_VCF = false;
-
- @Hidden
- @Input(fullName = "genotype_intervals", shortName = "genotype",
- doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or not", required = false)
- public IntervalBinding genotypeIntervalsFile = null;
-
- @Hidden
- @Argument(fullName="unpaired", shortName="unpaired",
- doc="Perform unpaired calls (no somatic status detection)", required=false)
- boolean call_unpaired = false;
- boolean call_somatic ;
-
- @Argument(fullName="verboseOutput", shortName="verbose",
- doc="Verbose output file in text format", required=false)
- java.io.File verboseOutput = null;
-
- @Argument(fullName="bedOutput", shortName="bed",
- doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
- java.io.File bedOutput = null;
-
- @Deprecated
- @Argument(fullName="minCoverage", shortName="minCoverage",
- doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
- "with --unpaired (single sample) option, this value is used for minimum sample coverage. "+
- "INSTEAD USE: T_COV FILTER_EXPRESSIONS = new ArrayList();
-
-//@Argument(fullName="blacklistedLanes", shortName="BL",
-// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
-// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
-//PlatformUnitFilterHelper dummy;
-
- @Hidden
- @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",
- required=false) Boolean DEBUG = false;
- @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
- "May need to be increased to accomodate longer reads or longer deletions. A read can be fit into the "+
- "window if its length on the reference (i.e. read length + length of deletion gap(s) if any) is smaller "+
- "than the window size. Reads that do not fit will be ignored, so long deletions can not be called "+
- "if window is too small",required=false) int WINDOW_SIZE = 200;
- @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
- " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
-
-
-
- private WindowContext tumor_context;
- private WindowContext normal_context;
- private int currentContigIndex = -1;
- private int contigLength = -1; // we see to much messy data with reads hanging out of contig ends...
- private int currentPosition = -1; // position of the last read we've seen on the current contig
- private String refName = null;
- private java.io.Writer output = null;
- private GenomeLoc location = null;
- private long normalCallsMade = 0L, tumorCallsMade = 0L;
-
- boolean outOfContigUserWarned = false;
-
- private LocationAwareSeekableRODIterator refseqIterator=null;
-
-// private Set normalReadGroups; // we are going to remember which read groups are normals and which are tumors in order to be able
-// private Set tumorReadGroups ; // to properly assign the reads coming from a merged stream
- private Set normalSamples; // we are going to remember which samples are normal and which are tumor:
- private Set tumorSamples ; // these are used only to generate genotypes for vcf output
-
- private int NQS_WIDTH = 5; // 5 bases on each side of the indel for NQS-style statistics
-
- private Writer bedWriter = null;
- private Writer verboseWriter = null;
-
-
- private static String annGenomic = "GENOMIC\t";
- private static String annIntron = "INTRON";
- private static String annUTR = "UTR";
- private static String annCoding = "CODING";
- private static String annUnknown = "UNKNOWN";
-
- enum CallType {
- NOCOVERAGE,
- BADCOVERAGE,
- NOEVIDENCE,
- GERMLINE,
- SOMATIC
- };
-
- private SAMRecord lastRead;
- private byte[] refBases;
- private ReferenceDataSource refData;
- private Iterator genotypeIntervalIterator = null;
-
- // the current interval in the list of intervals, for which we want to do full genotyping
- private GenomeLoc currentGenotypeInterval = null;
- private long lastGenotypedPosition = -1; // last position on the currentGenotypeInterval, for which a call was already printed;
- // can be 1 base before lastGenotyped start
-
- private JexlEngine jexlEngine = new JexlEngine();
- private ArrayList jexlExpressions = new ArrayList();
-
- // the following arrays store indel source-specific (normal/tumor) metric names
- // for fast access when populating JEXL expression contexts (see IndelPrecall.fillContext())
- private final static String[] normalMetricsCassette = new String[4];
- private final static String[] tumorMetricsCassette = new String[4];
- private final static String[] singleMetricsCassette = new String[4];
- private final static int C_COV=0;
- private final static int C_CONS_CNT=1;
- private final static int C_INDEL_F=2;
- private final static int C_INDEL_CF=3;
- static {
- normalMetricsCassette[C_COV] = "N_COV";
- tumorMetricsCassette[C_COV] = "T_COV";
- singleMetricsCassette[C_COV] = "COV";
- normalMetricsCassette[C_CONS_CNT] = "N_CONS_CNT";
- tumorMetricsCassette[C_CONS_CNT] = "T_CONS_CNT";
- singleMetricsCassette[C_CONS_CNT] = "CONS_CNT";
- normalMetricsCassette[C_INDEL_F] = "N_INDEL_F";
- tumorMetricsCassette[C_INDEL_F] = "T_INDEL_F";
- singleMetricsCassette[C_INDEL_F] = "INDEL_F";
- normalMetricsCassette[C_INDEL_CF] = "N_INDEL_CF";
- tumorMetricsCassette[C_INDEL_CF] = "T_INDEL_CF";
- singleMetricsCassette[C_INDEL_CF] = "INDEL_CF";
- }
-
- // "/humgen/gsa-scr1/GATK_Data/refGene.sorted.txt"
-
- private Set getVCFHeaderInfo() {
- Set headerInfo = new HashSet();
-
- // first, the basic info
- headerInfo.add(new VCFHeaderLine("source", "SomaticIndelDetector"));
- headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
- headerInfo.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
-
- // FORMAT and INFO fields
-// headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
-
- headerInfo.addAll(VCFIndelAttributes.getAttributeHeaderLines());
- if ( call_somatic ) {
- headerInfo.add(new VCFInfoHeaderLine(VCFConstants.SOMATIC_KEY, 0, VCFHeaderLineType.Flag, "Somatic event"));
- } else {
- }
-
- // all of the arguments from the argument collection
- Set