Handle merge conflicts
This commit is contained in:
commit
52067f0549
|
|
@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","651469eeacdb3ab9e2690cfb71f6a634");
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","be7dc20bdb5f200d189706bcf1aeb7ee");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d1c113a17e36762d27eb27fd12528e52");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","25e5ea86d87b7d7ddaad834a6ed7481d");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ab043eed87fadbe5761a55a4912b19ac");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","cdbf268d282e57189a88fb83f0e1fd72");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","95d48e0680019d5406ff9adb8f2ff3ca");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","2ed40925cd112c1a45470d215b7ec4b3");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","8a4ddd64c4e9c42b4a8622582fcfa9c9");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","33695a998bcc906cabcc758727004387");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "b2725242114bf9cc9bca14679705ba40");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("2ba9af34d2a4d55caf152265a30ead46"));
|
||||
Arrays.asList("847605f4efafef89529fe0e496315edd"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("0630c35c070d7a7e0cf22b3cce797f22"));
|
||||
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("5857dcb4e6a8422ae0813e42d433b122"));
|
||||
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("489deda5d3276545364a06b7385f8bd9"));
|
||||
Arrays.asList("dff4412a074940d26994f9552476b209"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("595ba44c75d08dab98df222b8e61ab70"));
|
||||
Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("360f9795facdaa14c0cb4b05207142e4"));
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("4b4a62429f8eac1e2f27ba5e2edea9e5"));
|
||||
Arrays.asList("44e9f6cf11b4efecb454cd3de8de9877"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("cc892c91a93dbd8dbdf645803f35a0ee"));
|
||||
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "3fc7d2681ff753e2d68605d7cf8b63e3";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "e6e33f0ebabab027eabed51fe9a08da9";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("04dc83d7dfb42b8cada91647bd9f32f1"));
|
||||
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("4429a665a1048f958db3c204297cdb9f"));
|
||||
Arrays.asList("55760482335497086458b09e415ecf54"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("f063e3573c513eaa9ce7d7df22143362"));
|
||||
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("d76e93e2676354dde832f08a508c6f88"));
|
||||
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
|
|
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "1a65172b9bd7a2023d48bc758747b34a");
|
||||
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "3f1fa34d8440f6f21654ce60c0ba8f28");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "f240434b4d3c234f6f9e349e9ec05f4e");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("aec378bed312b3557c6dd7ec740c8091"));
|
||||
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "5da6b24033a6b02f466836443d49560e" );
|
||||
testHeterozosity( 0.01, "bdc8760d7ae1e01c0510b12c1e6fcfa3" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "1f284c4af967a3c26687164f9441fb16" );
|
||||
testHeterozosity( 1.0 / 1850, "f508f06a47305e11e62776615cb14fe3" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("cff553c53de970f64051ed5711407038"));
|
||||
Arrays.asList("13d91059f58fb50a07a6a34b9438a45b"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("f960a91963e614a6c8d8cda57836df24"));
|
||||
Arrays.asList("07d8b77a5f6697f3a47a4f1efb0dcf50"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("46a6d24c82ebb99d305462960fa09b7c"));
|
||||
Arrays.asList("0f026d2e568172cf32813cc54ea7ba23"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("2be25321bbc6a963dba7ecba5dd76802"));
|
||||
Arrays.asList("e7ad858e9d6617534761918561f3ed4c"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("d6b2657cd5a4a949968cdab50efce515"));
|
||||
Arrays.asList("39c7a813fd6ee82d3604f2a868b35b2a"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("9cff66a321284c362f393bc4db21f756"));
|
||||
Arrays.asList("9430fe36789a791fcff6162f768ae563"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("90c8cfcf65152534c16ed81104fc3bcd"));
|
||||
Arrays.asList("8d8dbf483526b0b309f5728619a74a86"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("457b8f899cf1665de61e75084dbb79d0"));
|
||||
Arrays.asList("5667a699a3a13474f2d1cd2d6b01cd5b"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("a13fe7aa3b9e8e091b3cf3442a056ec1"));
|
||||
Arrays.asList("b6c1d5cd28ff584c5f5037afef4e883a"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -361,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("d075ad318739c8c56bdce857da1e48b9"));
|
||||
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -373,7 +373,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 20:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("91c632ab17a1dd89ed19ebb20324f905"));
|
||||
Arrays.asList("1e0d2c15546c3b0959b00ffb75488b56"));
|
||||
|
||||
executeTest(String.format("test UG with base indel quality scores"), spec);
|
||||
}
|
||||
|
|
@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("1d80e135d611fe19e1fb1882aa588a73"));
|
||||
Arrays.asList("db3026c49a3de7a5cb9a3d77635d0706"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("752139616752902fca13c312d8fe5e22"));
|
||||
Arrays.asList("7ab8e5ee15ab98d6756b0eea0f4d3798"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -423,7 +423,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("d66b9decf26e1704abda1a919ac149cd"));
|
||||
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
|
||||
Arrays.asList("b62ba9777efc05af4c36e2d4ce3ee67c"));
|
||||
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
|
||||
|
|
@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("f72ecd00b2913f63788faa7dabb1d102"));
|
||||
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "f059743858004ceee325f2a7761a2362");
|
||||
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "04845ba1ec7d8d8b0eab2ca6bdb9c1a6");
|
||||
testReducedCalling("INDEL", "1c9aaf65ffaa12bb766855265a1c3f8e");
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testContaminationDownsampling() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
|
||||
Arrays.asList("b500ad5959bce69f888a2fac024647e5"));
|
||||
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
|
||||
executeTest("test contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "7122d4f0ef94c5274aa3047cfebe08ed");
|
||||
HCTest(CEUTRIO_BAM, "", "47fdbe5f01d3ce5e53056eea8c488e45");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "6cd6e6787521c07a7bae98766fd628ab");
|
||||
HCTest(NA12878_BAM, "", "a2c63f6e6e51a01019bdbd23125bdb15");
|
||||
}
|
||||
|
||||
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"44df2a9da4fbd2162ae44c3f2a6ef01f");
|
||||
"54b7cc3da3d8349ff4302f99883ab188");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -44,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a413eeb7a75cab0ab5370b4c08dcf8e");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6c0c441b71848c2eea38ab5e2afe1120");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "77cf5b5273828dd1605bb23a5aeafcaa");
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "87ca97f90e74caee35c35616c065821c");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3df42d0550b51eb9b55aac61e8b3c452"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ece627de486aee69d02872891c6cb0ff"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4dbc72b72e3e2d9d812d5a398490e213"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("add0f4f51969b7caeea99005a7ba1aa4"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("f8c2745bf71f2659a57494fcaa2c103b"));
|
||||
Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -570,32 +570,11 @@ public class GenomeAnalysisEngine {
|
|||
else if(walker instanceof ActiveRegionWalker) {
|
||||
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
||||
|
||||
switch(argCollection.activeRegionShardType) {
|
||||
case LOCUSSHARD:
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
|
||||
case READSHARD:
|
||||
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
|
||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
|
||||
new LegacyReadShardBalancer() :
|
||||
new ReadShardBalancer();
|
||||
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer);
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer);
|
||||
case ACTIVEREGIONSHARD:
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer());
|
||||
default:
|
||||
throw new UserException.CommandLineException("Invalid active region shard type.");
|
||||
}
|
||||
}
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
|
||||
}
|
||||
else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) {
|
||||
// Apply special validation to read pair walkers.
|
||||
if(walker instanceof ReadPairWalker) {
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
|
|
@ -449,14 +448,5 @@ public class GATKArgumentCollection {
|
|||
@Hidden
|
||||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Experimental Active Region Traversal modes
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false)
|
||||
public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,58 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShardDataProvider extends ShardDataProvider {
|
||||
final private ReadShardDataProvider readProvider;
|
||||
final private LocusShardDataProvider locusProvider;
|
||||
|
||||
public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard, genomeLocParser, reference, rods); // TODO: necessary?
|
||||
readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods);
|
||||
locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods);
|
||||
}
|
||||
|
||||
public ReadShardDataProvider getReadShardDataProvider() {
|
||||
return readProvider;
|
||||
}
|
||||
|
||||
public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) {
|
||||
return locusProvider;
|
||||
}
|
||||
}
|
||||
|
|
@ -44,22 +44,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a data provider based on an input provider
|
||||
* Used only by ExperimentalReadShardTraverseActiveRegions
|
||||
* @param dataProvider
|
||||
* @param sourceInfo
|
||||
* @param genomeLocParser
|
||||
* @param locus
|
||||
* @param locusIterator
|
||||
*/
|
||||
public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) {
|
||||
super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData());
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns information about the source of the reads.
|
||||
* @return Info about the source of the reads.
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShard extends ReadShard {
|
||||
public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
||||
super(parser, readsDataSource, fileSpans, loci, isUnmapped);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,32 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShardBalancer extends ReadShardBalancer {
|
||||
// TODO ?
|
||||
}
|
||||
|
|
@ -40,9 +40,7 @@ import java.util.Map;
|
|||
*/
|
||||
public abstract class Shard implements HasGenomeLocation {
|
||||
public enum ShardType {
|
||||
READ,
|
||||
LOCUS,
|
||||
ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions
|
||||
READ, LOCUS
|
||||
}
|
||||
|
||||
protected final GenomeLocParser parser; // incredibly annoying!
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
|
|
@ -12,8 +11,6 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
@ -81,18 +78,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) {
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
if ( walker.isDone() ) break;
|
||||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
else {
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
|
|
@ -108,14 +93,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) {
|
||||
final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) {
|
||||
final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
|
||||
Object result = accumulator.finishTraversal();
|
||||
|
||||
|
|
|
|||
|
|
@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.traversals.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
|
||||
|
|
@ -246,12 +245,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
} else if (walker instanceof ReadPairWalker) {
|
||||
return new TraverseReadPairs();
|
||||
} else if (walker instanceof ActiveRegionWalker) {
|
||||
switch (engine.getArguments().activeRegionShardType) {
|
||||
case LOCUSSHARD: return new TraverseActiveRegions();
|
||||
case READSHARD: return new ExperimentalReadShardTraverseActiveRegions();
|
||||
case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions();
|
||||
default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker.");
|
||||
}
|
||||
return new TraverseActiveRegions();
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,309 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ExperimentalActiveRegionShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ActiveRegionShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final ActiveRegionShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider();
|
||||
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ReadView readView = new ReadView(readDataProvider);
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
|
||||
|
||||
Shard readShard = readDataProvider.getShard();
|
||||
SAMFileHeader header = readShard.getReadProperties().getHeader();
|
||||
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
|
||||
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator);
|
||||
final LocusView locusView = new AllLocusView(locusDataProvider);
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
locusDataProvider.close();
|
||||
}
|
||||
|
||||
windowMaker.close();
|
||||
|
||||
updateCumulativeMetrics(readDataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now process the active regions, where possible
|
||||
boolean emptyQueue = false;
|
||||
sum = processActiveRegions(walker, sum, emptyQueue);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// simple utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
}
|
||||
|
||||
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// code to handle processing active regions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
return sum;
|
||||
} else {
|
||||
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out each active region to the walker activeRegionOutStream
|
||||
*
|
||||
* @param walker
|
||||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
|
||||
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
|
||||
|
||||
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peekFirst() != null ) {
|
||||
ActiveRegion firstRegion = workQueue.getFirst();
|
||||
final String firstRegionContig = firstRegion.getLocation().getContig();
|
||||
if (emptyQueue || firstRegionContig != lastRegionContig) {
|
||||
sum = processFirstActiveRegion(sum, walker);
|
||||
}
|
||||
else {
|
||||
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
|
||||
if (lastRegionStart > firstRegionMaxReadStop) {
|
||||
sum = processFirstActiveRegion( sum, walker );
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the first active region and all remaining reads which overlap
|
||||
*
|
||||
* Remove the first active region from the queue
|
||||
* (NB: some reads associated with this active region may have already been processed)
|
||||
*
|
||||
* Remove all of these reads from the queue
|
||||
* (NB: some may be associated with other active regions)
|
||||
*
|
||||
* @param sum
|
||||
* @param walker
|
||||
* @return
|
||||
*/
|
||||
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ActiveRegion firstRegion = workQueue.removeFirst();
|
||||
|
||||
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
|
||||
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
|
||||
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
|
||||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
|
||||
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
ActiveRegion bestRegion = firstRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( firstRead );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(firstRegion) ) {
|
||||
firstRegion.add(firstRead);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for non-primary vs. extended
|
||||
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
|
||||
myReads.removeFirst();
|
||||
firstRead = myReads.peekFirst();
|
||||
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
}
|
||||
|
||||
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
|
||||
final M x = walker.map( firstRegion, null );
|
||||
return walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
boolean emptyQueue = true;
|
||||
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,309 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ExperimentalReadShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ReadShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final ReadShardDataProvider readDataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider));
|
||||
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ReadView readView = new ReadView(readDataProvider);
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
|
||||
|
||||
Shard readShard = readDataProvider.getShard();
|
||||
SAMFileHeader header = readShard.getReadProperties().getHeader();
|
||||
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
|
||||
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider,
|
||||
iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator);
|
||||
|
||||
final LocusView locusView = new AllLocusView(locusDataProvider);
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
locusDataProvider.close();
|
||||
}
|
||||
|
||||
windowMaker.close();
|
||||
|
||||
updateCumulativeMetrics(readDataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now process the active regions, where possible
|
||||
boolean emptyQueue = false;
|
||||
sum = processActiveRegions(walker, sum, emptyQueue);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// simple utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
}
|
||||
|
||||
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// code to handle processing active regions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
return sum;
|
||||
} else {
|
||||
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out each active region to the walker activeRegionOutStream
|
||||
*
|
||||
* @param walker
|
||||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
|
||||
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
|
||||
|
||||
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peekFirst() != null ) {
|
||||
ActiveRegion firstRegion = workQueue.getFirst();
|
||||
final String firstRegionContig = firstRegion.getLocation().getContig();
|
||||
if (emptyQueue || firstRegionContig != lastRegionContig) {
|
||||
sum = processFirstActiveRegion(sum, walker);
|
||||
}
|
||||
else {
|
||||
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
|
||||
if (lastRegionStart > firstRegionMaxReadStop) {
|
||||
sum = processFirstActiveRegion( sum, walker );
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the first active region and all remaining reads which overlap
|
||||
*
|
||||
* Remove the first active region from the queue
|
||||
* (NB: some reads associated with this active region may have already been processed)
|
||||
*
|
||||
* Remove all of these reads from the queue
|
||||
* (NB: some may be associated with other active regions)
|
||||
*
|
||||
* @param sum
|
||||
* @param walker
|
||||
* @return
|
||||
*/
|
||||
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ActiveRegion firstRegion = workQueue.removeFirst();
|
||||
|
||||
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
|
||||
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
|
||||
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
|
||||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
|
||||
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
ActiveRegion bestRegion = firstRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( firstRead );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(firstRegion) ) {
|
||||
firstRegion.add(firstRead);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for non-primary vs. extended
|
||||
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
|
||||
myReads.removeFirst();
|
||||
firstRead = myReads.peekFirst();
|
||||
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
}
|
||||
|
||||
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
|
||||
final M x = walker.map( firstRegion, null );
|
||||
return walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
boolean emptyQueue = true;
|
||||
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
|
@ -197,7 +197,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
}
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
printProgress(site);
|
||||
printProgress(site.getStopLocation());
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,92 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 1/3/13
|
||||
* Time: 11:36 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AverageAltAlleleLength extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation, ExperimentalAnnotation {
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Average Allele Length"));
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("AAL"); }
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
if ( !vc.hasLog10PError() )
|
||||
return null;
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
||||
double length = getMeanAltAlleleLength(vc);
|
||||
map.put(getKeyNames().get(0),String.format("%.2f",length));
|
||||
return map;
|
||||
}
|
||||
|
||||
public static double getMeanAltAlleleLength(VariantContext vc) {
|
||||
double averageLength = 1.0;
|
||||
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
|
||||
// adjust for the event length
|
||||
int averageLengthNum = 0;
|
||||
int averageLengthDenom = 0;
|
||||
int refLength = vc.getReference().length();
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
int numAllele = vc.getCalledChrCount(a);
|
||||
int alleleSize;
|
||||
if ( a.length() == refLength ) {
|
||||
// SNP or MNP
|
||||
byte[] a_bases = a.getBases();
|
||||
byte[] ref_bases = vc.getReference().getBases();
|
||||
int n_mismatch = 0;
|
||||
for ( int idx = 0; idx < a_bases.length; idx++ ) {
|
||||
if ( a_bases[idx] != ref_bases[idx] )
|
||||
n_mismatch++;
|
||||
}
|
||||
alleleSize = n_mismatch;
|
||||
}
|
||||
else if ( a.isSymbolic() ) {
|
||||
alleleSize = 1;
|
||||
} else {
|
||||
alleleSize = Math.abs(refLength-a.length());
|
||||
}
|
||||
averageLengthNum += alleleSize*numAllele;
|
||||
averageLengthDenom += numAllele;
|
||||
}
|
||||
averageLength = ( (double) averageLengthNum )/averageLengthDenom;
|
||||
}
|
||||
|
||||
return averageLength;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import com.sun.org.apache.bcel.internal.generic.AALOAD;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -68,50 +69,17 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( depth == 0 )
|
||||
return null;
|
||||
|
||||
double QD = -10.0 * vc.getLog10PError() / (double)depth;
|
||||
double altAlleleLength = AverageAltAlleleLength.getMeanAltAlleleLength(vc);
|
||||
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
||||
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
|
||||
// adjust for the event length
|
||||
int averageLengthNum = 0;
|
||||
int averageLengthDenom = 0;
|
||||
int refLength = vc.getReference().length();
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
int numAllele = vc.getCalledChrCount(a);
|
||||
int alleleSize;
|
||||
if ( a.length() == refLength ) {
|
||||
// SNP or MNP
|
||||
byte[] a_bases = a.getBases();
|
||||
byte[] ref_bases = vc.getReference().getBases();
|
||||
int n_mismatch = 0;
|
||||
for ( int idx = 0; idx < a_bases.length; idx++ ) {
|
||||
if ( a_bases[idx] != ref_bases[idx] )
|
||||
n_mismatch++;
|
||||
}
|
||||
alleleSize = n_mismatch;
|
||||
}
|
||||
else if ( a.isSymbolic() ) {
|
||||
alleleSize = 1;
|
||||
} else {
|
||||
alleleSize = Math.abs(refLength-a.length());
|
||||
}
|
||||
averageLengthNum += alleleSize*numAllele;
|
||||
averageLengthDenom += numAllele;
|
||||
}
|
||||
double averageLength = ( (double) averageLengthNum )/averageLengthDenom;
|
||||
QD /= averageLength;
|
||||
map.put(getKeyNames().get(1),String.format("%.2f",averageLength));
|
||||
}
|
||||
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD","AAL"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"),
|
||||
new VCFInfoHeaderLine(getKeyNames().get(1), 1, VCFHeaderLineType.Float, "Average Allele Length"));
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -27,10 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
|
|||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -128,29 +125,19 @@ public class RecalibrationEngine {
|
|||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
|
||||
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
* @param reportedQual the quality score reported by the instrument for this base
|
||||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize, if appropriate, all derived data in recalibrationTables.
|
||||
|
|
@ -226,36 +213,4 @@ public class RecalibrationEngine {
|
|||
if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called");
|
||||
return finalRecalibrationTables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
|
||||
final byte qual,
|
||||
final double isError,
|
||||
final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1L, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1L, isError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,11 +26,6 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
public final boolean isActive;
|
||||
|
||||
// maximum stop position of all reads with start position in this active region
|
||||
// Used only by ExperimentalReadShardTraverseActiveRegions
|
||||
// NB: these reads may not be associated with this active region!
|
||||
private int maxReadStop;
|
||||
|
||||
public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) {
|
||||
this.activeRegionLoc = activeRegionLoc;
|
||||
this.isActive = isActive;
|
||||
|
|
@ -38,7 +33,6 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
this.extension = extension;
|
||||
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
|
||||
fullExtentReferenceLoc = extendedLoc;
|
||||
maxReadStop = activeRegionLoc.getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -99,18 +93,6 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
||||
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||
|
||||
public void setMaxReadStop(int maxReadStop) {
|
||||
this.maxReadStop = maxReadStop;
|
||||
}
|
||||
|
||||
public int getMaxReadStop() {
|
||||
return maxReadStop;
|
||||
}
|
||||
|
||||
public int getExtendedMaxReadStop() {
|
||||
return maxReadStop + extension;
|
||||
}
|
||||
|
||||
public boolean equalExceptReads(final ActiveRegion other) {
|
||||
if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
|
||||
if ( isActive != other.isActive ) return false;
|
||||
|
|
|
|||
|
|
@ -1,14 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 1/2/13
|
||||
* Time: 4:59 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public enum ExperimentalActiveRegionShardType {
|
||||
LOCUSSHARD, // default/legacy type
|
||||
READSHARD,
|
||||
ACTIVEREGIONSHARD
|
||||
}
|
||||
|
|
@ -269,9 +269,9 @@ public class RecalUtils {
|
|||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
|
|
@ -279,13 +279,13 @@ public class RecalUtils {
|
|||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
final GATKReportTable reportTable;
|
||||
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
|
||||
if(sortByCols) {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
|
|
@ -295,7 +295,7 @@ public class RecalUtils {
|
|||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
|
||||
rowIndex = 0; // reset the row index since we're starting with a new table
|
||||
} else {
|
||||
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
|
||||
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal());
|
||||
}
|
||||
|
||||
final NestedIntegerArray<RecalDatum> table = recalibrationTables.getTable(tableIndex);
|
||||
|
|
@ -306,9 +306,9 @@ public class RecalUtils {
|
|||
int columnIndex = 0;
|
||||
int keyIndex = 0;
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), requestedCovariates[0].formatKey(keys[keyIndex++]));
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()) {
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), requestedCovariates[1].formatKey(keys[keyIndex++]));
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
|
||||
final Covariate covariate = requestedCovariates[tableIndex];
|
||||
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(keys[keyIndex++]));
|
||||
|
|
@ -320,7 +320,7 @@ public class RecalUtils {
|
|||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), event.toString());
|
||||
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());
|
||||
|
|
@ -414,7 +414,7 @@ public class RecalUtils {
|
|||
}
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < requestedCovariates.length; i++) {
|
||||
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < requestedCovariates.length; i++) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(i);
|
||||
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final int[] covs = new int[4];
|
||||
|
|
@ -458,9 +458,9 @@ public class RecalUtils {
|
|||
|
||||
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
final List<Object> values = new ArrayList<Object>(4);
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.index].formatKey((Integer)keys.get(0)));
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()].formatKey((Integer)keys.get(0)));
|
||||
final int covariateIndex = (Integer)keys.get(1);
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index] : covariates[covariateIndex];
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()] : covariates[covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(2);
|
||||
values.add(covariate.formatKey(covariateKey));
|
||||
values.add(covariateNameMap.get(covariate));
|
||||
|
|
@ -793,4 +793,48 @@ public class RecalUtils {
|
|||
myDatum.combine(row.value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
public static void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
|
||||
final byte qual,
|
||||
final double isError,
|
||||
final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
* @param reportedQual the quality score reported by the instrument for this base
|
||||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
private static RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -130,12 +130,12 @@ public class RecalibrationReport {
|
|||
final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex].keyFromValue(covValue);
|
||||
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[3] = event.ordinal();
|
||||
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,15 +42,9 @@ import java.util.ArrayList;
|
|||
|
||||
public final class RecalibrationTables {
|
||||
public enum TableType {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLES_START(2);
|
||||
|
||||
public final int index;
|
||||
|
||||
private TableType(final int index) {
|
||||
this.index = index;
|
||||
}
|
||||
READ_GROUP_TABLE,
|
||||
QUALITY_SCORE_TABLE,
|
||||
OPTIONAL_COVARIATE_TABLES_START;
|
||||
}
|
||||
|
||||
private final ArrayList<NestedIntegerArray<RecalDatum>> tables;
|
||||
|
|
@ -60,7 +54,7 @@ public final class RecalibrationTables {
|
|||
private final PrintStream log;
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates) {
|
||||
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, null);
|
||||
this(covariates, covariates[TableType.READ_GROUP_TABLE.ordinal()].maximumKeyValue() + 1, null);
|
||||
}
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
|
||||
|
|
@ -72,31 +66,31 @@ public final class RecalibrationTables {
|
|||
for ( int i = 0; i < covariates.length; i++ )
|
||||
tables.add(i, null); // initialize so we can set below
|
||||
|
||||
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1;
|
||||
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1;
|
||||
this.numReadGroups = numReadGroups;
|
||||
this.log = log;
|
||||
|
||||
tables.set(TableType.READ_GROUP_TABLE.index,
|
||||
tables.set(TableType.READ_GROUP_TABLE.ordinal(),
|
||||
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension) :
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, "READ_GROUP_TABLE", numReadGroups, eventDimension));
|
||||
|
||||
tables.set(TableType.QUALITY_SCORE_TABLE.index, makeQualityScoreTable());
|
||||
tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable());
|
||||
|
||||
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++)
|
||||
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++)
|
||||
tables.set(i,
|
||||
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) :
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.index + 1),
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1),
|
||||
numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension));
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getReadGroupTable() {
|
||||
return getTable(TableType.READ_GROUP_TABLE.index);
|
||||
return getTable(TableType.READ_GROUP_TABLE.ordinal());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getQualityScoreTable() {
|
||||
return getTable(TableType.QUALITY_SCORE_TABLE.index);
|
||||
return getTable(TableType.QUALITY_SCORE_TABLE.ordinal());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
|
|
|
|||
|
|
@ -3,16 +3,10 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import com.google.java.contract.PreconditionError;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -21,6 +15,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -33,7 +28,6 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -101,9 +95,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private final TraverseActiveRegions<Integer, Integer> traverse = new TraverseActiveRegions<Integer, Integer>();
|
||||
private final ExperimentalReadShardTraverseActiveRegions<Integer, Integer> readShardTraverse = new ExperimentalReadShardTraverseActiveRegions<Integer, Integer>();
|
||||
private final ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer> activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer>();
|
||||
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
|
||||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
|
|
@ -114,8 +106,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
private static final String testBAM = "TraverseActiveRegionsUnitTest.bam";
|
||||
private static final String testBAI = "TraverseActiveRegionsUnitTest.bai";
|
||||
|
||||
private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
|
|
@ -183,8 +173,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
|
||||
traverse(walker, dataProvider, 0);
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
activeIntervals.addAll(walker.isActiveCalls);
|
||||
}
|
||||
|
||||
|
|
@ -421,10 +411,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
|
||||
traverse(walker, dataProvider, 0);
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
endTraversal(walker, 0);
|
||||
t.endTraversal(walker, 0);
|
||||
|
||||
return walker.mappedActiveRegions;
|
||||
}
|
||||
|
|
@ -485,12 +475,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return record;
|
||||
}
|
||||
|
||||
private List<ShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
|
||||
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
GATKArgumentCollection arguments = new GATKArgumentCollection();
|
||||
arguments.activeRegionShardType = shardType; // make explicit
|
||||
engine.setArguments(arguments);
|
||||
t.initialize(engine);
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
|
||||
|
|
@ -498,65 +486,13 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser);
|
||||
|
||||
List<ShardDataProvider> providers = new ArrayList<ShardDataProvider>();
|
||||
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) {
|
||||
providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
|
||||
return providers;
|
||||
}
|
||||
|
||||
private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) {
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
}
|
||||
|
||||
private void endTraversal(DummyActiveRegionWalker walker, int i) {
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.endTraversal(walker, i);
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.endTraversal(walker, i);
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.endTraversal(walker, i);
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a127623a26bac4c17c9df491e170ed88"));
|
||||
Arrays.asList("fbfbd4d13b7ba3d76e8e186902e81378"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("13e24e6b9dfa241df5baa2c3f53415b9"));
|
||||
Arrays.asList("19aef8914efc497192f89a9038310ca5"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("07cb4d427235878aeec0066d7d298e54"));
|
||||
Arrays.asList("4f0b8033da18e6cf6e9b8d5d36c21ba2"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("e579097677d5e56a5776151251947961"));
|
||||
Arrays.asList("64ca176d587dfa2b3b9dec9f7999305c"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("348314945436ace71ce6b1a52559d9ee"));
|
||||
Arrays.asList("f33f417fad98c05d9cd08ffa22943b0f"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("ae7930e37a66c0aa4cfe0232736864fe"));
|
||||
Arrays.asList("0c810f6c4abef9d9dc5513ca872d3d22"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("a0ba056c2625033e5e859fd6bcec1256"));
|
||||
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("0be7da17340111a94e8581ee3808c88a"));
|
||||
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("e40e625302a496ede42eed61c2ce524b"));
|
||||
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("cb50876477d3e035b6eda5d720d7ba8d"));
|
||||
Arrays.asList("cc7184263975595a6e2473d153227146"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoQuals() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("458412261d61797d39f802c1e03d63f6"));
|
||||
Arrays.asList("aea983adc01cd059193538cc30adc17d"));
|
||||
executeTest("test file doesn't have QUALs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("39defa8108dca9fa3e54b22a7da43f77"));
|
||||
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
@ -146,7 +146,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("a917edd58a0c235e9395bfc2d2020a8c"));
|
||||
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,11 @@ public class ProgressMeterDaemonUnitTest extends BaseTest {
|
|||
daemon.done();
|
||||
Assert.assertTrue(daemon.isDone());
|
||||
|
||||
Assert.assertEquals(meter.progressCalls.size(), ticks,
|
||||
"Expected " + ticks + " progress calls from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
|
||||
Assert.assertTrue(meter.progressCalls.size() >= 1,
|
||||
"Expected at least one progress update call from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
|
||||
|
||||
final int tolerance = (int)Math.ceil(0.8 * meter.progressCalls.size());
|
||||
Assert.assertTrue(Math.abs(meter.progressCalls.size() - ticks) <= tolerance,
|
||||
"Expected " + ticks + " progress calls from daemon thread, but got " + meter.progressCalls.size() + " and a tolerance of only " + tolerance);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,8 +94,8 @@ public class RecalibrationReportUnitTest {
|
|||
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
|
||||
nKeys += 2;
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j];
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j);
|
||||
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j];
|
||||
if ( covValue >= 0 ) {
|
||||
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
|
||||
nKeys++;
|
||||
|
|
|
|||
|
|
@ -29,15 +29,46 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public final class RecalibrationTablesUnitTest extends BaseTest {
|
||||
private RecalibrationTables tables;
|
||||
private Covariate[] covariates;
|
||||
private int numReadGroups = 6;
|
||||
final byte qualByte = 1;
|
||||
final List<Integer> combineStates = Arrays.asList(0, 1, 2);
|
||||
|
||||
@BeforeMethod
|
||||
private void makeTables() {
|
||||
covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
|
||||
tables = new RecalibrationTables(covariates, numReadGroups);
|
||||
fillTable(tables);
|
||||
}
|
||||
|
||||
private void fillTable(final RecalibrationTables tables) {
|
||||
for ( int iterations = 0; iterations < 10; iterations++ ) {
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
for ( final int rg : combineStates) {
|
||||
final double error = rg % 2 == 0 ? 1 : 0;
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getReadGroupTable(), qualByte, error, rg, et.ordinal());
|
||||
for ( final int qual : combineStates) {
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getQualityScoreTable(), qualByte, error, rg, qual, et.ordinal());
|
||||
for ( final int cycle : combineStates)
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(2), qualByte, error, rg, qual, cycle, et.ordinal());
|
||||
for ( final int context : combineStates)
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(3), qualByte, error, rg, qual, context, et.ordinal());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void basicTest() {
|
||||
final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
|
||||
final int numReadGroups = 6;
|
||||
final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups);
|
||||
|
||||
final Covariate qualCov = covariates[1];
|
||||
final Covariate cycleCov = covariates[2];
|
||||
final Covariate contextCov = covariates[3];
|
||||
|
|
@ -45,11 +76,11 @@ public final class RecalibrationTablesUnitTest extends BaseTest {
|
|||
Assert.assertEquals(tables.numTables(), covariates.length);
|
||||
|
||||
Assert.assertNotNull(tables.getReadGroupTable());
|
||||
Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.index));
|
||||
Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()));
|
||||
testDimensions(tables.getReadGroupTable(), numReadGroups);
|
||||
|
||||
Assert.assertNotNull(tables.getQualityScoreTable());
|
||||
Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index));
|
||||
Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()));
|
||||
testDimensions(tables.getQualityScoreTable(), numReadGroups, qualCov.maximumKeyValue() + 1);
|
||||
|
||||
Assert.assertNotNull(tables.getTable(2));
|
||||
|
|
@ -72,13 +103,74 @@ public final class RecalibrationTablesUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void basicMakeQualityScoreTable() {
|
||||
final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
|
||||
final int numReadGroups = 6;
|
||||
final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups);
|
||||
|
||||
final Covariate qualCov = covariates[1];
|
||||
final NestedIntegerArray<RecalDatum> copy = tables.makeQualityScoreTable();
|
||||
testDimensions(copy, numReadGroups, qualCov.maximumKeyValue()+1);
|
||||
Assert.assertEquals(copy.getAllValues().size(), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombine1() {
|
||||
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
|
||||
fillTable(merged);
|
||||
|
||||
merged.combine(tables);
|
||||
|
||||
for ( int i = 0; i < tables.numTables(); i++ ) {
|
||||
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
|
||||
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
|
||||
|
||||
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
|
||||
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
|
||||
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
|
||||
Assert.assertNotNull(mergedValue);
|
||||
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() * 2);
|
||||
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() * 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombineEmptyOther() {
|
||||
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
|
||||
|
||||
merged.combine(tables);
|
||||
|
||||
for ( int i = 0; i < tables.numTables(); i++ ) {
|
||||
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
|
||||
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
|
||||
|
||||
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
|
||||
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
|
||||
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
|
||||
Assert.assertNotNull(mergedValue);
|
||||
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations());
|
||||
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombinePartial() {
|
||||
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
|
||||
for ( final int rg : combineStates) {
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(merged.getTable(3), qualByte, 1, rg, 0, 0, 0);
|
||||
}
|
||||
|
||||
merged.combine(tables);
|
||||
for ( int i = 0; i < tables.numTables(); i++ ) {
|
||||
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
|
||||
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
|
||||
|
||||
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
|
||||
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
|
||||
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
|
||||
Assert.assertNotNull(mergedValue);
|
||||
|
||||
final int delta = i == 3 && leaf.keys[1] == 0 && leaf.keys[2] == 0 && leaf.keys[3] == 0 ? 1 : 0;
|
||||
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() + delta);
|
||||
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() + delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue