Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Ami Levy-Moonshine 2013-01-06 23:35:47 -05:00
commit d3c2c97fb2
53 changed files with 1543 additions and 760 deletions

View File

@ -1,99 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.LinkedList;
import java.util.List;
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class);
final List<NestedIntegerArray<RecalDatum>> allThreadLocalQualityScoreTables = new LinkedList<NestedIntegerArray<RecalDatum>>();
private ThreadLocal<NestedIntegerArray<RecalDatum>> threadLocalQualityScoreTables = new ThreadLocal<NestedIntegerArray<RecalDatum>>() {
@Override
protected synchronized NestedIntegerArray<RecalDatum> initialValue() {
final NestedIntegerArray<RecalDatum> table = recalibrationTables.makeQualityScoreTable();
allThreadLocalQualityScoreTables.add(table);
return table;
}
};
@Override
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final NestedIntegerArray<RecalDatum> qualityScoreTable = getThreadLocalQualityScoreTable();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.index;
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
}
}
/**
* Get a NestedIntegerArray for a QualityScore table specific to this thread
* @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate
*/
private NestedIntegerArray<RecalDatum> getThreadLocalQualityScoreTable() {
return threadLocalQualityScoreTables.get();
}
@Override
public void finalizeData() {
// merge in all of the thread local tables
logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables");
for ( final NestedIntegerArray<RecalDatum> localTable : allThreadLocalQualityScoreTables ) {
recalibrationTables.combineQualityScoreTable(localTable);
}
allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves
super.finalizeData();
}
}

View File

@ -53,21 +53,21 @@ public class BQSRIntegrationTest extends WalkerTest {
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
String HiSeqInterval = "chr1:10,000,000-10,100,000";
return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "2f250fecb930e0dfe0f63fe0fed3960b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "26c8d7226139a040557b1d3b1c8792f0")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "9b43a1839cb6ea03aec1d96f15ca8efb")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "3159a9d136c45e4a65d46a23dc8fd3b5")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "bb7262829effbbdbc8d88dd36f480368")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "fbb002fa2b9197c4b555852dccc11562")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "7392acb71131a60a527ca32715fc59be")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "49d4383896a90795d94138db1410a7df")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "427448eff98cf194cc7217c0b1401e79")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "50cd1a10b6ecb3d09f90f1e4a66da95d")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "1dc71561c9d0fb56f9876cb5043c5376")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "13e8f032e76340b114847c90af0a1f8a")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "03f58ae4f9d203034e895a3636fc108f")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "49d4383896a90795d94138db1410a7df")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2db2ef8c2d63e167663d70340182f49a")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "6b3f252718f59cf9fd3f7612f73a35bf")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "863576ac9ff0b0e02f2e84aef15923a7")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "03e28f48201a35c70d1cf48e9f45364f")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "6e3c5635d387a1c428a7c9c88ad26488")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "6507adcb94bacde4cdee9caa9f14f24b")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "399bbb4bf80764dfc644b2f95d824615")},
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "34d70899253c2b3343ca9ae944291c30")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "e61fa47bfc08433f0cd55558e2081548")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "5c2622c63225b8b04990baf0ae4de07c")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "ee7191d83d7d5bb957dc4595883c32f1")},
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "da92f4730356f479c2c2b71497cfac6d")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "8075595113b48c0c7ead08ce41bef9fe")},
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "be05834841c5690c66910270521d5c32")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "e61fa47bfc08433f0cd55558e2081548")},
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "8ee0b498dbbc95ce76393a0f089fec92")},
};
}
@ -104,7 +104,7 @@ public class BQSRIntegrationTest extends WalkerTest {
" -sortAllCols" +
" --plot_pdf_file /dev/null" +
" --intermediate_csv_file %s",
Arrays.asList("d1c38a3418979400630e2bca1140689c"));
Arrays.asList("dd6e0e1e3f53f8ae0c8f5de21ded6ee9"));
executeTest("testBQSR-CSVfile", spec);
}

View File

@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSNP_ACS_Pools() {
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","651469eeacdb3ab9e2690cfb71f6a634");
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","be7dc20bdb5f200d189706bcf1aeb7ee");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d1c113a17e36762d27eb27fd12528e52");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","25e5ea86d87b7d7ddaad834a6ed7481d");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ab043eed87fadbe5761a55a4912b19ac");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","cdbf268d282e57189a88fb83f0e1fd72");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","95d48e0680019d5406ff9adb8f2ff3ca");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","2ed40925cd112c1a45470d215b7ec4b3");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","8a4ddd64c4e9c42b4a8622582fcfa9c9");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","33695a998bcc906cabcc758727004387");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "b2725242114bf9cc9bca14679705ba40");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
}
}

View File

@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("2ba9af34d2a4d55caf152265a30ead46"));
Arrays.asList("847605f4efafef89529fe0e496315edd"));
executeTest("test MultiSample Pilot1", spec);
}
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("0630c35c070d7a7e0cf22b3cce797f22"));
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("5857dcb4e6a8422ae0813e42d433b122"));
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("489deda5d3276545364a06b7385f8bd9"));
Arrays.asList("dff4412a074940d26994f9552476b209"));
executeTest("test SingleSample Pilot2", spec);
}
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("595ba44c75d08dab98df222b8e61ab70"));
Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e"));
executeTest("test Multiple SNP alleles", spec);
}
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("360f9795facdaa14c0cb4b05207142e4"));
Arrays.asList("d915535c1458733f09f82670092fcab6"));
executeTest("test bad read", spec);
}
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("4b4a62429f8eac1e2f27ba5e2edea9e5"));
Arrays.asList("44e9f6cf11b4efecb454cd3de8de9877"));
executeTest("test reverse trim", spec);
}
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("cc892c91a93dbd8dbdf645803f35a0ee"));
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
executeTest("test mismatched PLs", spec);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "3fc7d2681ff753e2d68605d7cf8b63e3";
private final static String COMPRESSED_OUTPUT_MD5 = "e6e33f0ebabab027eabed51fe9a08da9";
@Test
public void testCompressedOutput() {
@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("04dc83d7dfb42b8cada91647bd9f32f1"));
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
executeTest("test min_base_quality_score 26", spec);
}
@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("4429a665a1048f958db3c204297cdb9f"));
Arrays.asList("55760482335497086458b09e415ecf54"));
executeTest("test SLOD", spec);
}
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("f063e3573c513eaa9ce7d7df22143362"));
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
executeTest("test NDA", spec);
}
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("d76e93e2676354dde832f08a508c6f88"));
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
executeTest("test using comp track", spec);
}
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "1a65172b9bd7a2023d48bc758747b34a");
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "3f1fa34d8440f6f21654ce60c0ba8f28");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "f240434b4d3c234f6f9e349e9ec05f4e");
testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
}
private void testOutputParameters(final String args, final String md5) {
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("aec378bed312b3557c6dd7ec740c8091"));
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
executeTest("test confidence 1", spec1);
}
@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "5da6b24033a6b02f466836443d49560e" );
testHeterozosity( 0.01, "bdc8760d7ae1e01c0510b12c1e6fcfa3" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "1f284c4af967a3c26687164f9441fb16" );
testHeterozosity( 1.0 / 1850, "f508f06a47305e11e62776615cb14fe3" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("cff553c53de970f64051ed5711407038"));
Arrays.asList("13d91059f58fb50a07a6a34b9438a45b"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("f960a91963e614a6c8d8cda57836df24"));
Arrays.asList("07d8b77a5f6697f3a47a4f1efb0dcf50"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("46a6d24c82ebb99d305462960fa09b7c"));
Arrays.asList("0f026d2e568172cf32813cc54ea7ba23"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("2be25321bbc6a963dba7ecba5dd76802"));
Arrays.asList("e7ad858e9d6617534761918561f3ed4c"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("d6b2657cd5a4a949968cdab50efce515"));
Arrays.asList("39c7a813fd6ee82d3604f2a868b35b2a"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("9cff66a321284c362f393bc4db21f756"));
Arrays.asList("9430fe36789a791fcff6162f768ae563"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("90c8cfcf65152534c16ed81104fc3bcd"));
Arrays.asList("8d8dbf483526b0b309f5728619a74a86"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("457b8f899cf1665de61e75084dbb79d0"));
Arrays.asList("5667a699a3a13474f2d1cd2d6b01cd5b"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("a13fe7aa3b9e8e091b3cf3442a056ec1"));
Arrays.asList("b6c1d5cd28ff584c5f5037afef4e883a"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -361,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("d075ad318739c8c56bdce857da1e48b9"));
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@ -373,7 +373,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 20:10,000,000-10,100,000",
1,
Arrays.asList("91c632ab17a1dd89ed19ebb20324f905"));
Arrays.asList("1e0d2c15546c3b0959b00ffb75488b56"));
executeTest(String.format("test UG with base indel quality scores"), spec);
}
@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("1d80e135d611fe19e1fb1882aa588a73"));
Arrays.asList("db3026c49a3de7a5cb9a3d77635d0706"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("752139616752902fca13c312d8fe5e22"));
Arrays.asList("7ab8e5ee15ab98d6756b0eea0f4d3798"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -423,7 +423,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("d66b9decf26e1704abda1a919ac149cd"));
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
executeTest("test minIndelFraction 1.0", spec);
}
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
Arrays.asList("b62ba9777efc05af4c36e2d4ce3ee67c"));
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("f72ecd00b2913f63788faa7dabb1d102"));
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "f059743858004ceee325f2a7761a2362");
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "04845ba1ec7d8d8b0eab2ca6bdb9c1a6");
testReducedCalling("INDEL", "1c9aaf65ffaa12bb766855265a1c3f8e");
}
@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testContaminationDownsampling() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
Arrays.asList("b500ad5959bce69f888a2fac024647e5"));
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}

View File

@ -21,19 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "7122d4f0ef94c5274aa3047cfebe08ed");
HCTest(CEUTRIO_BAM, "", "47fdbe5f01d3ce5e53056eea8c488e45");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "6cd6e6787521c07a7bae98766fd628ab");
HCTest(NA12878_BAM, "", "a2c63f6e6e51a01019bdbd23125bdb15");
}
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"44df2a9da4fbd2162ae44c3f2a6ef01f");
"54b7cc3da3d8349ff4302f99883ab188");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -44,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a413eeb7a75cab0ab5370b4c08dcf8e");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6c0c441b71848c2eea38ab5e2afe1120");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "77cf5b5273828dd1605bb23a5aeafcaa");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "87ca97f90e74caee35c35616c065821c");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3df42d0550b51eb9b55aac61e8b3c452"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ece627de486aee69d02872891c6cb0ff"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4dbc72b72e3e2d9d812d5a398490e213"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("add0f4f51969b7caeea99005a7ba1aa4"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("f8c2745bf71f2659a57494fcaa2c103b"));
Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}

View File

@ -32,8 +32,8 @@ import org.apache.log4j.PatternLayout;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.help.HelpFormatter;
import org.broadinstitute.sting.utils.help.HelpUtils;
import java.io.IOException;
import java.util.*;
@ -289,7 +289,7 @@ public abstract class CommandLineProgram {
*/
private static void printDocumentationReference() {
errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
errorPrintf("commonly asked questions " + HelpUtils.BASE_GATK_URL + "%n");
errorPrintf("commonly asked questions " + HelpConstants.BASE_GATK_URL + "%n");
}

View File

@ -36,10 +36,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Attribution;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.HelpUtils;
import org.broadinstitute.sting.utils.help.*;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*;
@ -161,7 +158,7 @@ public class CommandLineGATK extends CommandLineExecutable {
List<String> header = new ArrayList<String>();
header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
header.add("Copyright (c) 2010 The Broad Institute");
header.add("For support and documentation go to " + HelpUtils.BASE_GATK_URL);
header.add("For support and documentation go to " + HelpConstants.BASE_GATK_URL);
return header;
}

View File

@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
*/
private final ReadProperties sourceInfo;
/**
* The parser, used to create and build new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
@ -45,7 +40,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,genomeLocParser,reference,rods);
this.sourceInfo = sourceInfo;
this.genomeLocParser = genomeLocParser;
this.locus = locus;
this.locusIterator = locusIterator;
}

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.filters;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.HelpUtils;
import org.broadinstitute.sting.utils.help.HelpConstants;
import java.util.Collection;
import java.util.List;
@ -71,7 +71,7 @@ public class FilterManager extends PluginManager<ReadFilter> {
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
userFriendlyListofReadFilters(availableFilters),
"Please consult the GATK Documentation (" + HelpUtils.GATK_DOCS_URL + ") for more information.");
"Please consult the GATK Documentation (" + HelpConstants.GATK_DOCS_URL + ") for more information.");
}
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {

View File

@ -117,7 +117,7 @@ public class GATKReport {
* @param numColumns the number of columns in this table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
addTable(tableName, tableDescription, numColumns, false, false);
addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT);
}
/**
@ -126,11 +126,10 @@ public class GATKReport {
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort the rows by the row ID
* @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
* @param sortingWay way to sort table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
tables.put(tableName, table);
}

View File

@ -46,8 +46,7 @@ public class GATKReportTable {
private final String tableName;
private final String tableDescription;
private final boolean sortByRowID;
private final boolean sortByAllColumns;
private final TableSortingWay sortingWay;
private List<Object[]> underlyingData;
private final List<GATKReportColumn> columnInfo;
@ -73,6 +72,12 @@ public class GATKReportTable {
public int index() { return index; }
}
public enum TableSortingWay {
SORT_BY_ROW,
SORT_BY_COLUMN,
DO_NOT_SORT
}
protected enum TableNameHeaderFields {
NAME(2),
DESCRIPTION(3);
@ -107,10 +112,7 @@ public class GATKReportTable {
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
// when reading from a file, we do not re-sort the rows
sortByRowID = false;
// when reading from a file, we do not re-sort the rows
sortByAllColumns = false;
sortingWay = TableSortingWay.DO_NOT_SORT;
// initialize the data
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
@ -181,7 +183,7 @@ public class GATKReportTable {
* @param numColumns the number of columns in this table
*/
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) {
this(tableName, tableDescription, numColumns, true, false);
this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW);
}
/**
@ -190,10 +192,9 @@ public class GATKReportTable {
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
* @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added)
* @param sortingWay in what way to sort rows (instead of the order in which they were added)
*/
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) {
if ( !isValidName(tableName) ) {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
@ -204,8 +205,7 @@ public class GATKReportTable {
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByRowID = sortByRowID;
this.sortByAllColumns = sortByAllColumns;
this.sortingWay = sortingWay;
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
columnInfo = new ArrayList<GATKReportColumn>(numColumns);
@ -218,7 +218,7 @@ public class GATKReportTable {
* @param tableToCopy
*/
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns);
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay);
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
addColumn(column.getColumnName(), column.getFormat());
if ( copyData )
@ -569,56 +569,53 @@ public class GATKReportTable {
out.println();
// write the table body
if ( sortByAllColumns ) {
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
switch (sortingWay) {
case SORT_BY_COLUMN:
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
}
if( result != EQUAL) {
return result;
}
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
if( result != EQUAL) {
return result;
}
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
if( result != EQUAL) {
return result;
}
}
return result;
}
return result;
}
});
for ( final Object[] row : underlyingData )
writeRow(out, row);
} else if ( sortByRowID ) {
// make sure that there are exactly the correct number of ID mappings
if ( rowIdToIndex.size() != underlyingData.size() )
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
});
for ( final Object[] row : underlyingData )
writeRow(out, row);
break;
case SORT_BY_ROW:
// make sure that there are exactly the correct number of ID mappings
if ( rowIdToIndex.size() != underlyingData.size() )
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
} else {
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
break;
case DO_NOT_SORT:
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
out.println();
}
@ -735,53 +732,47 @@ public class GATKReportTable {
}
private List<Object[]> getOrderedRows() {
if ( sortByAllColumns ) {
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
if( result != EQUAL) {
return result;
switch (sortingWay) {
case SORT_BY_COLUMN:
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
}
if( result != EQUAL) {
return result;
}
}
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
if( result != EQUAL) {
return result;
}
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
if( result != EQUAL) {
return result;
}
}
return result;
}
return result;
});
return underlyingData;
case SORT_BY_ROW:
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
});
return underlyingData;
} else if ( !sortByRowID ) {
return underlyingData;
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
default:
return underlyingData;
}
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
}
}

View File

@ -43,7 +43,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
public T traverse( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
final LocusView locusView = new AllLocusView(dataProvider);
@ -227,7 +227,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
final ActiveRegion activeRegion = workQueue.remove();
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
sum = processActiveRegion( activeRegion, sum, walker );
} else {
break;
}
@ -236,9 +236,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
return sum;
}
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M,T> walker ) {
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord read : reads ) {
for( final GATKSAMRecord read : myReads ) {
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
@ -278,7 +278,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
activeRegion.add( read );
}
}
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
myReads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());

View File

@ -197,7 +197,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
}
updateCumulativeMetrics(dataProvider.getShard());
printProgress(site);
printProgress(site.getStopLocation());
done = walker.isDone();
}

View File

@ -65,7 +65,8 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
@Override
public void progress(MapData lastProcessedMap) {
if ( lastProcessedMap.refContext != null )
printProgress(lastProcessedMap.refContext.getLocus());
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
printProgress(lastProcessedMap.refContext.getLocus().getStopLocation());
}
});
}

View File

@ -0,0 +1,92 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 1/3/13
* Time: 11:36 AM
* To change this template use File | Settings | File Templates.
*/
public class AverageAltAlleleLength extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation, ExperimentalAnnotation {
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Average Allele Length"));
}
public List<String> getKeyNames() { return Arrays.asList("AAL"); }
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
if ( !vc.hasLog10PError() )
return null;
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
Map<String, Object> map = new HashMap<String, Object>();
double length = getMeanAltAlleleLength(vc);
map.put(getKeyNames().get(0),String.format("%.2f",length));
return map;
}
public static double getMeanAltAlleleLength(VariantContext vc) {
double averageLength = 1.0;
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
// adjust for the event length
int averageLengthNum = 0;
int averageLengthDenom = 0;
int refLength = vc.getReference().length();
for ( Allele a : vc.getAlternateAlleles() ) {
int numAllele = vc.getCalledChrCount(a);
int alleleSize;
if ( a.length() == refLength ) {
// SNP or MNP
byte[] a_bases = a.getBases();
byte[] ref_bases = vc.getReference().getBases();
int n_mismatch = 0;
for ( int idx = 0; idx < a_bases.length; idx++ ) {
if ( a_bases[idx] != ref_bases[idx] )
n_mismatch++;
}
alleleSize = n_mismatch;
}
else if ( a.isSymbolic() ) {
alleleSize = 1;
} else {
alleleSize = Math.abs(refLength-a.length());
}
averageLengthNum += alleleSize*numAllele;
averageLengthDenom += numAllele;
}
averageLength = ( (double) averageLengthNum )/averageLengthDenom;
}
return averageLength;
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import com.sun.org.apache.bcel.internal.generic.AALOAD;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -68,50 +69,17 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( depth == 0 )
return null;
double QD = -10.0 * vc.getLog10PError() / (double)depth;
double altAlleleLength = AverageAltAlleleLength.getMeanAltAlleleLength(vc);
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
Map<String, Object> map = new HashMap<String, Object>();
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
// adjust for the event length
int averageLengthNum = 0;
int averageLengthDenom = 0;
int refLength = vc.getReference().length();
for ( Allele a : vc.getAlternateAlleles() ) {
int numAllele = vc.getCalledChrCount(a);
int alleleSize;
if ( a.length() == refLength ) {
// SNP or MNP
byte[] a_bases = a.getBases();
byte[] ref_bases = vc.getReference().getBases();
int n_mismatch = 0;
for ( int idx = 0; idx < a_bases.length; idx++ ) {
if ( a_bases[idx] != ref_bases[idx] )
n_mismatch++;
}
alleleSize = n_mismatch;
}
else if ( a.isSymbolic() ) {
alleleSize = 1;
} else {
alleleSize = Math.abs(refLength-a.length());
}
averageLengthNum += alleleSize*numAllele;
averageLengthDenom += numAllele;
}
double averageLength = ( (double) averageLengthNum )/averageLengthDenom;
QD /= averageLength;
map.put(getKeyNames().get(1),String.format("%.2f",averageLength));
}
map.put(getKeyNames().get(0), String.format("%.2f", QD));
return map;
}
public List<String> getKeyNames() { return Arrays.asList("QD","AAL"); }
public List<String> getKeyNames() { return Arrays.asList("QD"); }
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"),
new VCFInfoHeaderLine(getKeyNames().get(1), 1, VCFHeaderLineType.Float, "Average Allele Length"));
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
}

View File

@ -113,25 +113,39 @@ import java.util.List;
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
@PartitionBy(PartitionType.READ)
public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
/**
* all the command line arguments for BQSR and it's covariates
*/
@ArgumentCollection
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
/**
* When you have nct > 1, BQSR uses nct times more memory to compute its recalibration tables, for efficiency
* purposes. If you have many covariates, and therefore are using a lot of memory, you can use this flag
* to safely access only one table. There may be some CPU cost, but as long as the table is really big
* there should be relatively little CPU costs.
*/
@Argument(fullName = "lowMemoryMode", shortName="lowMemoryMode", doc="Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false)
public boolean lowMemoryMode = false;
@Advanced
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP;
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
/**
* an object that keeps track of the information necessary for quality score quantization
*/
private QuantizationInfo quantizationInfo;
private RecalibrationTables recalibrationTables;
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
/**
* list to hold the all the covariate objects that were requested (required + standard + experimental)
*/
private Covariate[] requestedCovariates;
private RecalibrationEngine recalibrationEngine;
private int minimumQToUse;
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
@ -143,7 +157,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
* Based on the covariates' estimates for initial capacity allocate the data hashmap
*/
public void initialize() {
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
// check for unsupported access
@ -185,29 +198,20 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
}
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG);
recalibrationEngine = initializeRecalibrationEngine();
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
initializeRecalibrationEngine();
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
referenceReader = getToolkit().getReferenceDataSource().getReference();
}
private RecalibrationEngine initializeRecalibrationEngine() {
/**
* Initialize the recalibration engine
*/
private void initializeRecalibrationEngine() {
int numReadGroups = 0;
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
numReadGroups += header.getReadGroups().size();
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
try {
final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
constructor.setAccessible(true);
return (RecalibrationEngine)constructor.newInstance();
}
catch (Exception e) {
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName());
}
recalibrationEngine = new RecalibrationEngine(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG, lowMemoryMode);
}
private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
@ -501,14 +505,18 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
logger.info("Processed: " + result + " reads");
}
private RecalibrationTables getRecalibrationTable() {
return recalibrationEngine.getFinalRecalibrationTables();
}
private void generatePlots() {
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
if (recalFile != null) {
RecalibrationReport report = new RecalibrationReport(recalFile);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
}
else
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
}
/**
@ -517,10 +525,10 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
* generate a quantization map (recalibrated_qual -> quantized_qual)
*/
private void quantizeQualityScores() {
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
quantizationInfo = new QuantizationInfo(getRecalibrationTable(), RAC.QUANTIZING_LEVELS);
}
private void generateReport() {
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, getRecalibrationTable(), requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
}
}

View File

@ -58,7 +58,8 @@ public final class ReadRecalibrationInfo {
if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null");
if ( skips == null ) throw new IllegalArgumentException("skips cannot be null");
if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null");
// future: may allow insertionErrors && deletionErrors to be null, so don't enforce
if ( insertionErrors == null ) throw new IllegalArgumentException("insertionErrors cannot be null");
if ( deletionErrors == null ) throw new IllegalArgumentException("deletionErrors cannot be null");
this.read = read;
this.baseQuals = read.getBaseQualities();
@ -73,8 +74,8 @@ public final class ReadRecalibrationInfo {
if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length);
if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length);
if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
if ( insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
if ( deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
}
/**

View File

@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection {
public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable;
if(SORT_BY_ALL_COLUMNS) {
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true);
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
}

View File

@ -1,35 +1,87 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.*;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
public interface RecalibrationEngine {
import java.io.PrintStream;
import java.util.LinkedList;
import java.util.List;
public class RecalibrationEngine {
final protected Covariate[] covariates;
final private int numReadGroups;
final private PrintStream maybeLogStream;
final private boolean lowMemoryMode;
/**
* Has finalizeData() been called?
*/
private boolean finalized = false;
/**
* The final (merged, etc) recalibration tables, suitable for downstream analysis.
*/
private RecalibrationTables finalRecalibrationTables = null;
private final List<RecalibrationTables> recalibrationTablesList = new LinkedList<RecalibrationTables>();
private final ThreadLocal<RecalibrationTables> threadLocalTables = new ThreadLocal<RecalibrationTables>() {
private synchronized RecalibrationTables makeAndCaptureTable() {
final RecalibrationTables newTable = new RecalibrationTables(covariates, numReadGroups, maybeLogStream);
recalibrationTablesList.add(newTable);
return newTable;
}
@Override
protected synchronized RecalibrationTables initialValue() {
if ( lowMemoryMode ) {
return recalibrationTablesList.isEmpty() ? makeAndCaptureTable() : recalibrationTablesList.get(0);
} else {
return makeAndCaptureTable();
}
}
};
/**
* Get a recalibration table suitable for updating the underlying RecalDatums
*
* May return a thread-local version, or a single version, depending on the initialization
* arguments of this instance.
*
* @return updated tables
*/
protected RecalibrationTables getUpdatableRecalibrationTables() {
return threadLocalTables.get();
}
/**
* Initialize the recalibration engine
*
@ -40,21 +92,125 @@ public interface RecalibrationEngine {
* The engine should collect match and mismatch data into the recalibrationTables data.
*
* @param covariates an array of the covariates we'll be using in this engine, order matters
* @param recalibrationTables the destination recalibrationTables where stats should be collected
* @param numReadGroups the number of read groups we should use for the recalibration tables
* @param maybeLogStream an optional print stream for logging calls to the nestedhashmap in the recalibration tables
*/
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
public RecalibrationEngine(final Covariate[] covariates, final int numReadGroups, final PrintStream maybeLogStream, final boolean enableLowMemoryMode) {
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
if ( numReadGroups < 1 ) throw new IllegalArgumentException("numReadGroups must be >= 1 but got " + numReadGroups);
this.covariates = covariates.clone();
this.numReadGroups = numReadGroups;
this.maybeLogStream = maybeLogStream;
this.lowMemoryMode = enableLowMemoryMode;
}
/**
* Update the recalibration statistics using the information in recalInfo
* @param recalInfo data structure holding information about the recalibration values for a single read
*/
@Requires("recalInfo != null")
public void updateDataForRead(final ReadRecalibrationInfo recalInfo);
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
final RecalibrationTables tables = getUpdatableRecalibrationTables();
final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
for (final EventType eventType : EventType.values()) {
final int[] keys = readCovariates.getKeySet(offset, eventType);
final int eventIndex = eventType.ordinal();
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
RecalUtils.incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
}
}
}
}
}
/**
* Finalize, if appropriate, all derived data in recalibrationTables.
*
* Called once after all calls to updateDataForRead have been issued.
*
* Assumes that all of the principal tables (by quality score) have been completely updated,
* and walks over this data to create summary data tables like by read group table.
*/
public void finalizeData();
public void finalizeData() {
if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called");
// merge all of the thread-local tables
finalRecalibrationTables = mergeThreadLocalRecalibrationTables();
final NestedIntegerArray<RecalDatum> byReadGroupTable = finalRecalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> byQualTable = finalRecalibrationTables.getQualityScoreTable();
// iterate over all values in the qual table
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[2];
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
final RecalDatum qualDatum = leaf.value;
if ( rgDatum == null ) {
// create a copy of qualDatum, and initialize byReadGroup table with it
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
} else {
// combine the qual datum with the existing datum in the byReadGroup table
rgDatum.combine(qualDatum);
}
}
finalized = true;
}
/**
* Merge all of the thread local recalibration tables into a single one.
*
* Reuses one of the recalibration tables to hold the merged table, so this function can only be
* called once in the engine.
*
* @return the merged recalibration table
*/
@Requires("! finalized")
private RecalibrationTables mergeThreadLocalRecalibrationTables() {
if ( recalibrationTablesList.isEmpty() ) throw new IllegalStateException("recalibration tables list is empty");
RecalibrationTables merged = null;
for ( final RecalibrationTables table : recalibrationTablesList ) {
if ( merged == null )
// fast path -- if there's only only one table, so just make it the merged one
merged = table;
else {
merged.combine(table);
}
}
return merged;
}
/**
* Get the final recalibration tables, after finalizeData() has been called
*
* This returns the finalized recalibration table collected by this engine.
*
* It is an error to call this function before finalizeData has been called
*
* @return the finalized recalibration table collected by this engine
*/
public RecalibrationTables getFinalRecalibrationTables() {
if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called");
return finalRecalibrationTables;
}
}

View File

@ -1,144 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
protected Covariate[] covariates;
protected RecalibrationTables recalibrationTables;
@Override
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null");
this.covariates = covariates.clone();
this.recalibrationTables = recalibrationTables;
}
@Override
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
final GATKSAMRecord read = recalInfo.getRead();
final EventType eventType = EventType.BASE_SUBSTITUTION;
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
if( ! recalInfo.skip(offset) ) {
final byte qual = recalInfo.getQual(eventType, offset);
final double isError = recalInfo.getErrorFraction(eventType, offset);
final int[] keys = readCovariates.getKeySet(offset, eventType);
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index);
for (int i = 2; i < covariates.length; i++) {
if (keys[i] < 0)
continue;
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index);
}
}
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual);
}
/**
* Create derived recalibration data tables
*
* Assumes that all of the principal tables (by quality score) have been completely updated,
* and walks over this data to create summary data tables like by read group table.
*/
@Override
public void finalizeData() {
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
// iterate over all values in the qual table
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
final int rgKey = leaf.keys[0];
final int eventIndex = leaf.keys[2];
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
final RecalDatum qualDatum = leaf.value;
if ( rgDatum == null ) {
// create a copy of qualDatum, and initialize byReadGroup table with it
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
} else {
// combine the qual datum with the existing datum in the byReadGroup table
rgDatum.combine(qualDatum);
}
}
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
final byte qual,
final double isError,
final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1.0, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1.0, isError);
}
}
}

View File

@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription, 6, true, false);
report.addTable(reportName, reportDescription, 6);
table = report.getTable(reportName);
table.addColumn("readgroup");
table.addColumn("cycle");

View File

@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
// create the table
final String tableName = ve.getSimpleName();
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false);
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()));
// grab the table, and add the columns we need to it
final GATKReportTable table = report.getTable(tableName);

View File

@ -32,8 +32,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.HelpUtils;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.variant.variantcontext.Allele;
@ -81,7 +81,7 @@ public class VariantDataManager {
final double theSTD = standardDeviation(theMean, iii);
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
if( Double.isNaN(theMean) ) {
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.forumPost("discussion/49/using-variant-annotator"));
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpConstants.forumPost("discussion/49/using-variant-annotator"));
}
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);

View File

@ -54,9 +54,10 @@ import java.util.*;
* Left-aligns indels from a variants file.
*
* <p>
* LeftAlignVariants is a tool that takes a VCF file and left-aligns any indels inside it. The same indel can often be
* LeftAlignVariants is a tool that takes a VCF file and left-aligns the indels inside it. The same indel can often be
* placed at multiple positions and still represent the same haplotype. While the standard convention with VCF is to
* place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
* Note that this tool cannot handle anything other than bi-allelic, simple indels. Complex events are written out unchanged.
*
* <h2>Input</h2>
* <p>

View File

@ -30,7 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.HelpUtils;
import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
@ -278,7 +278,7 @@ public class UserException extends ReviewedStingException {
public static class ReadMissingReadGroup extends MalformedBAM {
public ReadMissingReadGroup(SAMRecord read) {
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpUtils.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName()));
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpConstants.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName()));
}
}
@ -354,7 +354,7 @@ public class UserException extends ReviewedStingException {
super(String.format("Lexicographically sorted human genome sequence detected in %s."
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
+ "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.forumPost("discussion/58/companion-utilities-reordersam")
+ "\nYou can use the ReorderSam utility to fix this problem: " + HelpConstants.forumPost("discussion/58/companion-utilities-reordersam")
+ "\n %s contigs = %s",
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
}

View File

@ -50,7 +50,7 @@ public class ForumAPIUtils {
Gson gson = new Gson();
List<String> output = new ArrayList<String>();
String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
String text = httpGet(HelpConstants.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
APIQuery details = gson.fromJson(text, APIQuery.class);
ForumDiscussion[] discussions = details.Discussions;
@ -158,7 +158,7 @@ public class ForumAPIUtils {
Gson gson = new Gson();
String data = gson.toJson(post.getPostData());
httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
httpPost(data, HelpConstants.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
}

View File

@ -28,7 +28,7 @@ public class GATKDocUtils {
/**
* The URL root for RELEASED GATKDOC units
*/
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpConstants.GATK_DOCS_URL;
/**
* The URL root for STABLE GATKDOC units
*/

View File

@ -0,0 +1,81 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.help;
import com.sun.javadoc.FieldDoc;
import com.sun.javadoc.PackageDoc;
import com.sun.javadoc.ProgramElementDoc;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import java.lang.reflect.Field;
public class HelpConstants {
public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
public static String forumPost(String post) {
return GATK_FORUM_URL + post;
}
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
try {
Class type = getClassForDoc(classDoc);
return lhsClass.isAssignableFrom(type) && (!requireConcrete || JVMUtils.isConcrete(type));
} catch (Throwable t) {
// Ignore errors.
return false;
}
}
protected static Class getClassForDoc(ProgramElementDoc doc) throws ClassNotFoundException {
return Class.forName(getClassName(doc));
}
protected static Field getFieldForFieldDoc(FieldDoc fieldDoc) {
try {
Class clazz = getClassForDoc(fieldDoc.containingClass());
return JVMUtils.findField(clazz, fieldDoc.name());
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
/**
* Reconstitute the class name from the given class JavaDoc object.
*
* @param doc the Javadoc model for the given class.
* @return The (string) class name of the given class.
*/
protected static String getClassName(ProgramElementDoc doc) {
PackageDoc containingPackage = doc.containingPackage();
return containingPackage.name().length() > 0 ?
String.format("%s.%s", containingPackage.name(), doc.name()) :
String.format("%s", doc.name());
}
}

View File

@ -33,15 +33,6 @@ import java.lang.reflect.Field;
public class HelpUtils {
public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
public static String forumPost(String post) {
return GATK_FORUM_URL + post;
}
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
try {
Class type = getClassForDoc(classDoc);

View File

@ -160,6 +160,13 @@ public class ProgressMeter {
public ProgressMeter(final File performanceLogFile,
final String processingUnitName,
final GenomeLocSortedSet processingIntervals) {
this(performanceLogFile, processingUnitName, processingIntervals, ProgressMeterDaemon.DEFAULT_POLL_FREQUENCY_MILLISECONDS);
}
protected ProgressMeter(final File performanceLogFile,
final String processingUnitName,
final GenomeLocSortedSet processingIntervals,
final long pollingFrequency) {
if ( processingUnitName == null ) throw new IllegalArgumentException("processingUnitName cannot be null");
if ( processingIntervals == null ) throw new IllegalArgumentException("Target intervals cannot be null");
@ -184,10 +191,14 @@ public class ProgressMeter {
targetSizeInBP = processingIntervals.coveredSize();
// start up the timer
progressMeterDaemon = new ProgressMeterDaemon(this);
progressMeterDaemon = new ProgressMeterDaemon(this, pollingFrequency);
start();
}
public ProgressMeterDaemon getProgressMeterDaemon() {
return progressMeterDaemon;
}
/**
* Start up the progress meter, printing initialization message and starting up the
* daemon thread for periodic printing.
@ -223,11 +234,13 @@ public class ProgressMeter {
* the progress itself. A separate printing daemon periodically polls the meter to print out
* progress
*
* @param loc Current location, can be null if you are at the end of the processing unit
* @param loc Current location, can be null if you are at the end of the processing unit. Must
* have size == 1 (cannot be multiple bases in size).
* @param nTotalRecordsProcessed the total number of records we've processed
*/
public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
// weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));

View File

@ -8,10 +8,20 @@ package org.broadinstitute.sting.utils.progressmeter;
* Time: 9:16 PM
*/
public final class ProgressMeterDaemon extends Thread {
public final static long DEFAULT_POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
/**
* How frequently should we poll and print progress?
*/
private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
private final long pollFrequencyMilliseconds;
/**
* How long are we waiting between print progress calls are issued?
* @return the time in milliseconds between progress meter calls
*/
private long getPollFrequencyMilliseconds() {
return pollFrequencyMilliseconds;
}
/**
* Are we to continue periodically printing status, or should we shut down?
@ -27,13 +37,20 @@ public final class ProgressMeterDaemon extends Thread {
* Create a new ProgressMeterDaemon printing progress for meter
* @param meter the progress meter to print progress of
*/
public ProgressMeterDaemon(final ProgressMeter meter) {
public ProgressMeterDaemon(final ProgressMeter meter, final long pollFrequencyMilliseconds) {
if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
if ( pollFrequencyMilliseconds <= 0 ) throw new IllegalArgumentException("pollFrequencyMilliseconds must be greater than 0 but got " + pollFrequencyMilliseconds);
this.meter = meter;
this.pollFrequencyMilliseconds = pollFrequencyMilliseconds;
setDaemon(true);
setName("ProgressMeterDaemon");
}
public ProgressMeterDaemon(final ProgressMeter meter) {
this(meter, DEFAULT_POLL_FREQUENCY_MILLISECONDS);
}
/**
* Tells this daemon thread to shutdown at the next opportunity, as the progress
* metering is complete.
@ -42,6 +59,14 @@ public final class ProgressMeterDaemon extends Thread {
this.done = true;
}
/**
* Is this daemon thread done?
* @return true if done, false otherwise
*/
public boolean isDone() {
return done;
}
/**
* Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
* necessary, the provided progress meter. Never exits until the JVM is complete,
@ -51,7 +76,7 @@ public final class ProgressMeterDaemon extends Thread {
while (! done) {
meter.printProgress(false);
try {
Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
Thread.sleep(getPollFrequencyMilliseconds());
} catch (InterruptedException e) {
throw new RuntimeException(e);
}

View File

@ -198,7 +198,7 @@ public class BaseRecalibration {
}
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
final Double cached = globalDeltaQs.get(rgKey, errorModel.index);
final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
@ -210,7 +210,7 @@ public class BaseRecalibration {
}
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index);
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal());
if ( TEST_CACHING ) {
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
@ -240,7 +240,7 @@ public class BaseRecalibration {
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
double result = 0.0;
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index);
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
if (empiricalQualRG != null) {
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
@ -254,7 +254,7 @@ public class BaseRecalibration {
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
double result = 0.0;
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index);
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal());
if (empiricalQualQS != null) {
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
@ -287,7 +287,7 @@ public class BaseRecalibration {
final double globalDeltaQ,
final double deltaQReported,
final byte qualFromRead) {
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index);
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal());
if (empiricalQualCO != null) {
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported);

View File

@ -1,41 +1,39 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum EventType {
BASE_SUBSTITUTION(0, "M", "Base Substitution"),
BASE_INSERTION(1, "I", "Base Insertion"),
BASE_DELETION(2, "D", "Base Deletion");
BASE_SUBSTITUTION("M", "Base Substitution"),
BASE_INSERTION("I", "Base Insertion"),
BASE_DELETION("D", "Base Deletion");
public final int index;
private final String representation;
private final String longRepresentation;
private EventType(int index, String representation, String longRepresentation) {
this.index = index;
private EventType(String representation, String longRepresentation) {
this.representation = representation;
this.longRepresentation = longRepresentation;
}
/**
* Get the EventType corresponding to its ordinal index
* @param index an ordinal index
* @return the event type corresponding to ordinal index
*/
public static EventType eventFrom(int index) {
switch (index) {
case 0:
return BASE_SUBSTITUTION;
case 1:
return BASE_INSERTION;
case 2:
return BASE_DELETION;
default:
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
}
return EventType.values()[index];
}
public static EventType eventFrom(String event) {
/**
* Get the EventType with short string representation
* @throws IllegalArgumentException if representation doesn't correspond to one of EventType
* @param representation short string representation of the event
* @return an EventType
*/
public static EventType eventFrom(String representation) {
for (EventType eventType : EventType.values())
if (eventType.representation.equals(event))
if (eventType.representation.equals(representation))
return eventType;
throw new ReviewedStingException(String.format("Event %s does not exist.", event));
throw new IllegalArgumentException(String.format("Event %s does not exist.", representation));
}
@Override

View File

@ -67,10 +67,10 @@ public class QuantizationInfo {
return quantizationLevels;
}
public GATKReportTable generateReportTable(boolean sortBycols) {
public GATKReportTable generateReportTable(boolean sortByCols) {
GATKReportTable quantizedTable;
if(sortBycols) {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true);
if(sortByCols) {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
}

View File

@ -68,9 +68,9 @@ public class ReadCovariates {
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
*/
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
}
/**
@ -81,11 +81,11 @@ public class ReadCovariates {
* @return
*/
public int[] getKeySet(final int readPosition, final EventType errorModel) {
return keys[errorModel.index][readPosition];
return keys[errorModel.ordinal()][readPosition];
}
public int[][] getKeySet(final EventType errorModel) {
return keys[errorModel.index];
return keys[errorModel.ordinal()];
}
// ----------------------------------------------------------------------
@ -94,17 +94,9 @@ public class ReadCovariates {
//
// ----------------------------------------------------------------------
protected int[][] getMismatchesKeySet() {
return keys[EventType.BASE_SUBSTITUTION.index];
}
protected int[][] getInsertionsKeySet() {
return keys[EventType.BASE_INSERTION.index];
}
protected int[][] getDeletionsKeySet() {
return keys[EventType.BASE_DELETION.index];
}
protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); }
protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); }
protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); }
protected int[] getMismatchesKeySet(final int readPosition) {
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);

View File

@ -68,7 +68,7 @@ public class RecalDatum {
/**
* number of bases seen in total
*/
private double numObservations;
private long numObservations;
/**
* number of bases seen that didn't match the reference
@ -89,13 +89,13 @@ public class RecalDatum {
/**
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
*
* @param _numObservations
* @param _numMismatches
* @param reportedQuality
* @param _numObservations observations
* @param _numMismatches mismatches
* @param reportedQuality Qreported
*/
public RecalDatum(final double _numObservations, final double _numMismatches, final byte reportedQuality) {
public RecalDatum(final long _numObservations, final double _numMismatches, final byte reportedQuality) {
if ( _numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
if ( _numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
if ( _numMismatches < 0.0 ) throw new IllegalArgumentException("numMismatches < 0");
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
numObservations = _numObservations;
@ -106,7 +106,7 @@ public class RecalDatum {
/**
* Copy copy into this recal datum, overwriting all of this objects data
* @param copy
* @param copy RecalDatum to copy
*/
public RecalDatum(final RecalDatum copy) {
this.numObservations = copy.getNumObservations();
@ -119,7 +119,7 @@ public class RecalDatum {
* Add in all of the data from other into this object, updating the reported quality from the expected
* error rate implied by the two reported qualities
*
* @param other
* @param other RecalDatum to combine
*/
public synchronized void combine(final RecalDatum other) {
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
@ -192,7 +192,7 @@ public class RecalDatum {
@Override
public String toString() {
return String.format("%.2f,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
return String.format("%d,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
}
public String stringForCSV() {
@ -205,11 +205,11 @@ public class RecalDatum {
//
//---------------------------------------------------------------------------------------------------------------
public final double getNumObservations() {
public final long getNumObservations() {
return numObservations;
}
public final synchronized void setNumObservations(final double numObservations) {
public final synchronized void setNumObservations(final long numObservations) {
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
this.numObservations = numObservations;
empiricalQuality = UNINITIALIZED;
@ -227,7 +227,7 @@ public class RecalDatum {
}
@Requires({"by >= 0"})
public final synchronized void incrementNumObservations(final double by) {
public final synchronized void incrementNumObservations(final long by) {
numObservations += by;
empiricalQuality = UNINITIALIZED;
}
@ -240,7 +240,7 @@ public class RecalDatum {
@Requires({"incObservations >= 0", "incMismatches >= 0"})
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
public final synchronized void increment(final double incObservations, final double incMismatches) {
public final synchronized void increment(final long incObservations, final double incMismatches) {
numObservations += incObservations;
numMismatches += incMismatches;
empiricalQuality = UNINITIALIZED;
@ -248,7 +248,7 @@ public class RecalDatum {
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
public final synchronized void increment(final boolean isError) {
increment(1, isError ? 1 : 0.0);
increment(1, isError ? 1.0 : 0.0);
}
// -------------------------------------------------------------------------------------
@ -257,19 +257,6 @@ public class RecalDatum {
//
// -------------------------------------------------------------------------------------
/**
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
*/
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized void calcEmpiricalQuality() {
// TODO -- add code for Bayesian estimate of Qemp here
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
/**
* calculate the expected number of errors given the estimated Q reported and the number of observations
* in this datum.
@ -281,10 +268,29 @@ public class RecalDatum {
return getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
}
static final boolean DEBUG = false;
static final double RESOLUTION_BINS_PER_QUAL = 1.0;
/**
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
*/
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized void calcEmpiricalQuality() {
static public double bayesianEstimateOfEmpiricalQuality(final double nObservations, final double nErrors, final double QReported) {
// smoothing is one error and one non-error observation
final long mismatches = (long)(getNumMismatches() + 0.5) + SMOOTHING_CONSTANT;
final long observations = getNumObservations() + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT;
final double empiricalQual = RecalDatum.bayesianEstimateOfEmpiricalQuality(observations, mismatches, getEstimatedQReported());
// This is the old and busted point estimate approach:
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
//static final boolean DEBUG = false;
static private final double RESOLUTION_BINS_PER_QUAL = 1.0;
static public double bayesianEstimateOfEmpiricalQuality(final long nObservations, final long nErrors, final double QReported) {
final int numBins = (QualityUtils.MAX_REASONABLE_Q_SCORE + 1) * (int)RESOLUTION_BINS_PER_QUAL;
@ -294,14 +300,14 @@ public class RecalDatum {
final double QEmpOfBin = bin / RESOLUTION_BINS_PER_QUAL;
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10Likelihood(QEmpOfBin, nObservations, nErrors);
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10QempLikelihood(QEmpOfBin, nObservations, nErrors);
if ( DEBUG )
System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
//if ( DEBUG )
// System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
}
if ( DEBUG )
System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
//if ( DEBUG )
// System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
final double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10Posteriors);
final int MLEbin = MathUtils.maxElementIndex(normalizedPosteriors);
@ -310,35 +316,53 @@ public class RecalDatum {
return Qemp;
}
static final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
static {
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
final double GF_a = 0.0;
final double GF_b = 0.9;
final double GF_c = 0.0;
final double GF_d = 0.5;
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ )
log10QempPriorCache[i] = Math.log10(gaussian.value((double) i));
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
double log10Prior = Math.log10(gaussian.value((double) i));
if ( Double.isInfinite(log10Prior) )
log10Prior = -Double.MAX_VALUE;
log10QempPriorCache[i] = log10Prior;
}
}
static public double log10QempPrior(final double Qempirical, final double Qreported) {
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
if ( DEBUG )
System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
//if ( DEBUG )
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
return log10QempPriorCache[difference];
}
static public double log10Likelihood(final double Qempirical, final double nObservations, final double nErrors) {
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
if ( nObservations == 0 )
return 0.0;
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
// before casting down to an int.
if ( nObservations > Integer.MAX_VALUE ) {
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
final double fraction = (double)Integer.MAX_VALUE / (double)nObservations;
nErrors = Math.round((double)nErrors * fraction);
nObservations = Integer.MAX_VALUE;
}
// this is just a straight binomial PDF
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
if ( log10Prob == Double.NEGATIVE_INFINITY )
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
log10Prob = -Double.MAX_VALUE;
if ( DEBUG )
System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
//if ( DEBUG )
// System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
return log10Prob;
}
}

View File

@ -264,7 +264,7 @@ public class RecalDatumNode<T extends RecalDatum> {
for ( final RecalDatumNode<T> subnode : subnodes ) {
// use the yates correction to help avoid all zeros => NaN
counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L;
counts[i][1] = Math.round(subnode.getRecalDatum().getNumObservations()) + 2L;
counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L;
i++;
}
@ -320,7 +320,7 @@ public class RecalDatumNode<T extends RecalDatum> {
if ( isLeaf() ) {
// this is leave node
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * (double)recalDatum.getNumObservations();
// TODO -- how we can generalize this calculation?
// if ( this.qEnd <= minInterestingQual )
// // It's free to merge up quality scores below the smallest interesting one

View File

@ -93,7 +93,7 @@ public class RecalUtils {
private static final Pair<String, String> eventType = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%.2f");
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%.2f");
/**
@ -269,9 +269,9 @@ public class RecalUtils {
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()) {
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
columnNames.add(covariateValue);
columnNames.add(covariateName);
}
@ -279,15 +279,15 @@ public class RecalUtils {
columnNames.add(eventType); // the order of these column names is important here
columnNames.add(empiricalQuality);
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
columnNames.add(nObservations);
columnNames.add(nErrors);
final GATKReportTable reportTable;
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
if(sortByCols) {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true);
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
}
@ -295,7 +295,7 @@ public class RecalUtils {
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
rowIndex = 0; // reset the row index since we're starting with a new table
} else {
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal());
}
final NestedIntegerArray<RecalDatum> table = recalibrationTables.getTable(tableIndex);
@ -306,9 +306,9 @@ public class RecalUtils {
int columnIndex = 0;
int keyIndex = 0;
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), requestedCovariates[0].formatKey(keys[keyIndex++]));
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()) {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), requestedCovariates[1].formatKey(keys[keyIndex++]));
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
final Covariate covariate = requestedCovariates[tableIndex];
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(keys[keyIndex++]));
@ -320,7 +320,7 @@ public class RecalUtils {
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), event.toString());
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());
@ -414,7 +414,7 @@ public class RecalUtils {
}
// add the optional covariates to the delta table
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < requestedCovariates.length; i++) {
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < requestedCovariates.length; i++) {
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(i);
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
final int[] covs = new int[4];
@ -458,9 +458,9 @@ public class RecalUtils {
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
final List<Object> values = new ArrayList<Object>(4);
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.index].formatKey((Integer)keys.get(0)));
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()].formatKey((Integer)keys.get(0)));
final int covariateIndex = (Integer)keys.get(1);
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index] : covariates[covariateIndex];
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()] : covariates[covariateIndex];
final int covariateKey = (Integer)keys.get(2);
values.add(covariate.formatKey(covariateKey));
values.add(covariateNameMap.get(covariate));
@ -793,4 +793,48 @@ public class RecalUtils {
myDatum.combine(row.value);
}
}
/**
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
* if there isn't already one.
*
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
* to return false if another thread inserts a new item at our position in the middle of our put operation.
*
* @param table the table that holds/will hold our item
* @param qual qual for this event
* @param isError error value for this event
* @param keys location in table of our item
*/
public static void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
final byte qual,
final double isError,
final int... keys ) {
final RecalDatum existingDatum = table.get(keys);
if ( existingDatum == null ) {
// No existing item, try to put a new one
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
// Failed to put a new item because another thread came along and put an item here first.
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
table.get(keys).increment(1L, isError);
}
}
else {
// Easy case: already an item here, so increment it
existingDatum.increment(1L, isError);
}
}
/**
* creates a datum object with one observation and one or zero error
*
* @param reportedQual the quality score reported by the instrument for this base
* @param isError whether or not the observation is an error
* @return a new RecalDatum object with the observation and the error
*/
private static RecalDatum createDatumObject(final byte reportedQual, final double isError) {
return new RecalDatum(1, isError, reportedQual);
}
}

View File

@ -68,15 +68,6 @@ public class RecalibrationReport {
}
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
this.quantizationInfo = quantizationInfo;
this.recalibrationTables = recalibrationTables;
this.requestedCovariates = requestedCovariates;
this.argumentTable = argumentTable;
this.RAC = RAC;
this.optionalCovariateIndexes = null;
}
/**
* Counts the number of unique read groups in the table
*
@ -139,12 +130,12 @@ public class RecalibrationReport {
final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
final int covIndex = optionalCovariateIndexes.get(covName);
final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex].keyFromValue(covValue);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempCOVarray[3] = event.index;
tempCOVarray[3] = event.ordinal();
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
}
}
@ -161,7 +152,7 @@ public class RecalibrationReport {
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempQUALarray[2] = event.index;
tempQUALarray[2] = event.ordinal();
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
}
@ -178,7 +169,7 @@ public class RecalibrationReport {
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
tempRGarray[1] = event.index;
tempRGarray[1] = event.ordinal();
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
}
@ -192,11 +183,22 @@ public class RecalibrationReport {
else if ( o instanceof Long )
return (Long)o;
else
throw new ReviewedStingException("Object " + o + " is expected to be either a double, long or integer but its not either: " + o.getClass());
throw new ReviewedStingException("Object " + o + " is expected to be either a double, long or integer but it's not either: " + o.getClass());
}
private long asLong(final Object o) {
if ( o instanceof Long )
return (Long)o;
else if ( o instanceof Integer )
return ((Integer)o).longValue();
else if ( o instanceof Double )
return ((Double)o).longValue();
else
throw new ReviewedStingException("Object " + o + " is expected to be a long but it's not: " + o.getClass());
}
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
final double nObservations = asDouble(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
final long nObservations = asLong(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
final double nErrors = asDouble(reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME));
final double empiricalQuality = asDouble(reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME));

View File

@ -42,15 +42,9 @@ import java.util.ArrayList;
public final class RecalibrationTables {
public enum TableType {
READ_GROUP_TABLE(0),
QUALITY_SCORE_TABLE(1),
OPTIONAL_COVARIATE_TABLES_START(2);
public final int index;
private TableType(final int index) {
this.index = index;
}
READ_GROUP_TABLE,
QUALITY_SCORE_TABLE,
OPTIONAL_COVARIATE_TABLES_START;
}
private final ArrayList<NestedIntegerArray<RecalDatum>> tables;
@ -60,7 +54,7 @@ public final class RecalibrationTables {
private final PrintStream log;
public RecalibrationTables(final Covariate[] covariates) {
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, null);
this(covariates, covariates[TableType.READ_GROUP_TABLE.ordinal()].maximumKeyValue() + 1, null);
}
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
@ -72,31 +66,31 @@ public final class RecalibrationTables {
for ( int i = 0; i < covariates.length; i++ )
tables.add(i, null); // initialize so we can set below
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1;
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1;
this.numReadGroups = numReadGroups;
this.log = log;
tables.set(TableType.READ_GROUP_TABLE.index,
tables.set(TableType.READ_GROUP_TABLE.ordinal(),
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension) :
new LoggingNestedIntegerArray<RecalDatum>(log, "READ_GROUP_TABLE", numReadGroups, eventDimension));
tables.set(TableType.QUALITY_SCORE_TABLE.index, makeQualityScoreTable());
tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable());
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++)
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++)
tables.set(i,
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) :
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.index + 1),
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1),
numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension));
}
@Ensures("result != null")
public NestedIntegerArray<RecalDatum> getReadGroupTable() {
return getTable(TableType.READ_GROUP_TABLE.index);
return getTable(TableType.READ_GROUP_TABLE.ordinal());
}
@Ensures("result != null")
public NestedIntegerArray<RecalDatum> getQualityScoreTable() {
return getTable(TableType.QUALITY_SCORE_TABLE.index);
return getTable(TableType.QUALITY_SCORE_TABLE.ordinal());
}
@Ensures("result != null")
@ -123,12 +117,16 @@ public final class RecalibrationTables {
}
/**
* Merge in the quality score table information from qualityScoreTable into this
* recalibration table's quality score table.
*
* @param qualityScoreTable the quality score table we want to merge in
* Merge all of the tables from toMerge into into this set of tables
*/
public void combineQualityScoreTable(final NestedIntegerArray<RecalDatum> qualityScoreTable) {
RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable);
public void combine(final RecalibrationTables toMerge) {
if ( numTables() != toMerge.numTables() )
throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes");
for ( int i = 0; i < numTables(); i++ ) {
final NestedIntegerArray<RecalDatum> myTable = this.getTable(i);
final NestedIntegerArray<RecalDatum> otherTable = toMerge.getTable(i);
RecalUtils.combineTables(myTable, otherTable);
}
}
}

View File

@ -32,6 +32,13 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Random;
import java.io.FileInputStream;
import java.io.DataInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
public class GATKReportUnitTest extends BaseTest {
@Test
@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest {
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
}
private GATKReportTable getTableWithRandomValues() {
Random number = new Random(123L);
final int VALUESRANGE = 10;
GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3");
GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN );
final int NUMROWS = 100;
for (int x = 0; x < NUMROWS; x++) {
report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE));
}
return table;
}
@Test(enabled = true)
public void testSortingByColumn() {
Assert.assertEquals(isSorted(getTableWithRandomValues()), true);
}
private boolean isSorted(GATKReportTable table) {
boolean result = true;
File testingSortingTableFile = new File("testSortingFile.txt");
try {
// Connect print stream to the output stream
PrintStream ps = new PrintStream(testingSortingTableFile);
table.write(ps);
ps.close();
}
catch (Exception e){
System.err.println ("Error: " + e.getMessage());
}
ArrayList<int[]> rows = new ArrayList<int[]>();
try {
// Open the file
FileInputStream fStream = new FileInputStream(testingSortingTableFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fStream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
//Read File Line By Line
while ((strLine = br.readLine()) != null) {
String[] parts = strLine.split(" ");
int l = parts.length;
int[] row = new int[l];
for(int n = 0; n < l; n++) {
row[n] = Integer.parseInt(parts[n]);
}
rows.add(row);
}
//Close the input stream
in.close();
} catch (Exception e){//Catch exception if any
System.err.println("Error: " + e.getMessage());
}
for (int x = 1; x < rows.size() && result; x++) {
result = checkRowOrder(rows.get(x - 1), rows.get(x));
}
return result;
}
private boolean checkRowOrder(int[] row1, int[] row2) {
int l = row1.length;
final int EQUAL = 0;
int result = EQUAL;
for(int x = 0; x < l && ( result <= EQUAL); x++) {
result = ((Integer)row1[x]).compareTo(row2[x]);
}
if (result <= EQUAL) {
return true;
} else {
return false;
}
}
private GATKReportTable makeBasicTable() {
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
GATKReportTable table = report.getTable("TableName");
@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest {
table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true);
report1.addTable("Table3", "blah", 1, true, false);
report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW);
report1.getTable("Table3").addColumn("a");
report1.getTable("Table3").addRowIDMapping("q", 2);
report1.getTable("Table3").addRowIDMapping("5", 3);

View File

@ -112,15 +112,14 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
// TODO: test shard boundaries
// TODO: reads with indels
// TODO: reads which span many regions
// TODO: reads which are partially between intervals (in/outside extension)
// TODO: duplicate reads
// TODO: read at the end of a contig
// TODO: reads which are completely outside intervals but within extension
// TODO: test the extension itself
// TODO: unmapped reads
intervals = new ArrayList<GenomeLoc>();
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
@ -142,6 +141,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385));
reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400));
reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414));
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
createBAM(reads);
@ -153,7 +155,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
File indexFile = new File(testBAI);
indexFile.deleteOnExit();
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile);
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
out.addAlignment(read);
}
@ -272,6 +274,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -286,6 +291,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -309,6 +320,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -323,6 +337,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -347,6 +367,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -361,6 +384,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -429,6 +458,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
header.setSequenceDictionary(dictionary);
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);

View File

@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("a127623a26bac4c17c9df491e170ed88"));
Arrays.asList("fbfbd4d13b7ba3d76e8e186902e81378"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("13e24e6b9dfa241df5baa2c3f53415b9"));
Arrays.asList("19aef8914efc497192f89a9038310ca5"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("07cb4d427235878aeec0066d7d298e54"));
Arrays.asList("4f0b8033da18e6cf6e9b8d5d36c21ba2"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("e579097677d5e56a5776151251947961"));
Arrays.asList("64ca176d587dfa2b3b9dec9f7999305c"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("348314945436ace71ce6b1a52559d9ee"));
Arrays.asList("f33f417fad98c05d9cd08ffa22943b0f"));
executeTest("test exclude annotations", spec);
}
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("ae7930e37a66c0aa4cfe0232736864fe"));
Arrays.asList("0c810f6c4abef9d9dc5513ca872d3d22"));
executeTest("test overwriting header", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("a0ba056c2625033e5e859fd6bcec1256"));
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
executeTest("not passing it any reads", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("0be7da17340111a94e8581ee3808c88a"));
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("e40e625302a496ede42eed61c2ce524b"));
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("cb50876477d3e035b6eda5d720d7ba8d"));
Arrays.asList("cc7184263975595a6e2473d153227146"));
executeTest("getting DB tag with HM3", spec);
}
@ -130,7 +130,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoQuals() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
Arrays.asList("458412261d61797d39f802c1e03d63f6"));
Arrays.asList("aea983adc01cd059193538cc30adc17d"));
executeTest("test file doesn't have QUALs", spec);
}
@ -138,7 +138,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("39defa8108dca9fa3e54b22a7da43f77"));
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
executeTest("using expression", spec);
}
@ -146,7 +146,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("a917edd58a0c235e9395bfc2d2020a8c"));
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
executeTest("using expression with ID", spec);
}

View File

@ -42,7 +42,6 @@ public class BQSRGathererUnitTest extends BaseTest {
GATKReport originalReport = new GATKReport(recal_original);
GATKReport calculatedReport = new GATKReport(output);
// test the Arguments table
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
@ -86,7 +85,9 @@ public class BQSRGathererUnitTest extends BaseTest {
for (String column : columnsToTest) {
Object actual = calculated.get(new Integer(row), column);
Object expected = original.get(row, column);
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName());
//if ( !actual.equals(expected) )
// System.out.println("Row=" + row + " Table=" + original.getTableName() + " Column=" + column + " Expected=" + expected + " Actual=" + actual);
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Column=" + column);
}
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.bqsr;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
public final class ReadRecalibrationInfoUnitTest extends BaseTest {
@DataProvider(name = "InfoProvider")
public Object[][] createCombineTablesProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int readLength: Arrays.asList(10, 100, 1000) ) {
for ( final boolean includeIndelErrors : Arrays.asList(true, false) ) {
tests.add(new Object[]{readLength, includeIndelErrors});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "InfoProvider")
public void testReadInfo(final int readLength, final boolean includeIndelErrors) {
final ReadCovariates covariates = new ReadCovariates(readLength, 2);
final byte[] bases = new byte[readLength];
final byte[] baseQuals = new byte[readLength];
final byte[] insertionQuals = new byte[readLength];
final byte[] deletionQuals = new byte[readLength];
final boolean[] skips = new boolean[readLength];
final double[] snpErrors = new double[readLength];
final double[] insertionErrors = new double[readLength];
final double[] deletionsErrors = new double[readLength];
for ( int i = 0; i < readLength; i++ ) {
bases[i] = 'A';
baseQuals[i] = (byte)(i % SAMUtils.MAX_PHRED_SCORE);
insertionQuals[i] = (byte)((i+1) % SAMUtils.MAX_PHRED_SCORE);
deletionQuals[i] = (byte)((i+2) % SAMUtils.MAX_PHRED_SCORE);
skips[i] = i % 2 == 0;
snpErrors[i] = 1.0 / (i+1);
insertionErrors[i] = 0.5 / (i+1);
deletionsErrors[i] = 0.3 / (i+1);
}
final EnumMap<EventType, double[]> errors = new EnumMap<EventType, double[]>(EventType.class);
errors.put(EventType.BASE_SUBSTITUTION, snpErrors);
errors.put(EventType.BASE_INSERTION, insertionErrors);
errors.put(EventType.BASE_DELETION, deletionsErrors);
final EnumMap<EventType, byte[]> quals = new EnumMap<EventType, byte[]>(EventType.class);
quals.put(EventType.BASE_SUBSTITUTION, baseQuals);
quals.put(EventType.BASE_INSERTION, insertionQuals);
quals.put(EventType.BASE_DELETION, deletionQuals);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, baseQuals, readLength + "M");
if ( includeIndelErrors ) {
read.setBaseQualities(insertionQuals, EventType.BASE_INSERTION);
read.setBaseQualities(deletionQuals, EventType.BASE_DELETION);
}
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors);
Assert.assertEquals(info.getCovariatesValues(), covariates);
Assert.assertEquals(info.getRead(), read);
for ( int i = 0; i < readLength; i++ ) {
Assert.assertEquals(info.skip(i), skips[i]);
for ( final EventType et : EventType.values() ) {
Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]);
final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i]: GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL;
Assert.assertEquals(info.getQual(et, i), expectedQual);
}
}
}
}

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.progressmeter;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
/**
* UnitTests for the ProgressMeterDaemon
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ProgressMeterDaemonUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser;
@BeforeClass
public void init() throws FileNotFoundException {
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
}
// capture and count calls to progress
private class TestingProgressMeter extends ProgressMeter {
final List<Long> progressCalls = new LinkedList<Long>();
private TestingProgressMeter(final long poll) {
super(null, "test", new GenomeLocSortedSet(genomeLocParser), poll);
}
@Override
protected synchronized void printProgress(boolean mustPrint) {
progressCalls.add(System.currentTimeMillis());
}
}
@DataProvider(name = "PollingData")
public Object[][] makePollingData() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int ticks : Arrays.asList(1, 5, 10) ) {
for ( final int poll : Arrays.asList(10, 100) ) {
tests.add(new Object[]{poll, ticks});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90)
public void testMe(final long poll, final int ticks) throws InterruptedException {
final TestingProgressMeter meter = new TestingProgressMeter(poll);
final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon();
Assert.assertTrue(daemon.isDaemon());
Assert.assertFalse(daemon.isDone());
Thread.sleep(ticks * poll);
Assert.assertFalse(daemon.isDone());
daemon.done();
Assert.assertTrue(daemon.isDone());
Assert.assertTrue(meter.progressCalls.size() >= 1,
"Expected at least one progress update call from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
final int tolerance = (int)Math.ceil(0.8 * meter.progressCalls.size());
Assert.assertTrue(Math.abs(meter.progressCalls.size() - ticks) <= tolerance,
"Expected " + ticks + " progress calls from daemon thread, but got " + meter.progressCalls.size() + " and a tolerance of only " + tolerance);
}
}

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.progressmeter;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* UnitTests for the ProgressMeterData
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class ProgressMeterDataUnitTest extends BaseTest {
@Test
public void testBasic() {
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getElapsedSeconds(), 1.0);
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getUnitsProcessed(), 2);
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getBpProcessed(), 3);
}
@Test
public void testFraction() {
final double TOL = 1e-3;
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(10), 0.1, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(10), 0.2, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(100), 0.01, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(100), 0.02, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(0), 1.0, TOL);
}
@Test
public void testSecondsPerBP() {
final double TOL = 1e-3;
final long M = 1000000;
Assert.assertEquals(new ProgressMeterData(1.0, 1, M).secondsPerMillionBP(), 1.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, M/10).secondsPerMillionBP(), 10.0, TOL);
Assert.assertEquals(new ProgressMeterData(2.0, 1, M).secondsPerMillionBP(), 2.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 1, 0).secondsPerMillionBP(), 1e6, TOL);
}
@Test
public void testSecondsPerElement() {
final double TOL = 1e-3;
final long M = 1000000;
Assert.assertEquals(new ProgressMeterData(1.0, M, 1).secondsPerMillionElements(), 1.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, M/10, 1).secondsPerMillionElements(), 10.0, TOL);
Assert.assertEquals(new ProgressMeterData(2.00, M, 1).secondsPerMillionElements(), 2.0, TOL);
Assert.assertEquals(new ProgressMeterData(1.0, 0, 1).secondsPerMillionElements(), 1e6, TOL);
}
}

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.BaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.*;
public final class EventTypeUnitTest extends BaseTest {
@Test
public void testEventTypes() {
for ( final EventType et : EventType.values() ) {
Assert.assertNotNull(et.toString());
Assert.assertNotNull(et.prettyPrint());
Assert.assertFalse("".equals(et.toString()));
Assert.assertFalse("".equals(et.prettyPrint()));
Assert.assertEquals(EventType.eventFrom(et.ordinal()), et);
Assert.assertEquals(EventType.eventFrom(et.toString()), et);
}
}
@Test
public void testEventTypesEnumItself() {
final Set<String> shortReps = new HashSet<String>();
for ( final EventType et : EventType.values() ) {
Assert.assertFalse(shortReps.contains(et.toString()), "Short representative for EventType has duplicates for " + et);
shortReps.add(et.toString());
}
Assert.assertEquals(shortReps.size(), EventType.values().length, "Short representatives for EventType aren't unique");
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadString() {
EventType.eventFrom("asdfhalsdjfalkjsdf");
}
}

View File

@ -30,16 +30,13 @@ package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class RecalDatumUnitTest extends BaseTest {
@ -74,7 +71,7 @@ public class RecalDatumUnitTest extends BaseTest {
}
public RecalDatum makeRecalDatum() {
return new RecalDatum(exTotal, exError, (byte)getReportedQual());
return new RecalDatum((long)exTotal, (double)exError, (byte)getReportedQual());
}
@Override
@ -83,13 +80,19 @@ public class RecalDatumUnitTest extends BaseTest {
}
}
private static boolean createdDatumTestProviders = false;
@DataProvider(name = "RecalDatumTestProvider")
public Object[][] makeRecalDatumTestProvider() {
for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
for ( int reportedQual : Arrays.asList(10, 20) )
if ( E <= N )
new RecalDatumTestProvider(E, N, reportedQual);
if ( !createdDatumTestProviders ) {
for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
for ( int reportedQual : Arrays.asList(10, 20) )
if ( E <= N )
new RecalDatumTestProvider(E, N, reportedQual);
createdDatumTestProviders = true;
}
return RecalDatumTestProvider.getTests(RecalDatumTestProvider.class);
}
@ -104,7 +107,6 @@ public class RecalDatumUnitTest extends BaseTest {
Assert.assertEquals(datum.getNumObservations(), cfg.exTotal, 1E-6);
if ( cfg.getReportedQual() != -1 )
Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual());
BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled());
BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate());
final double e = datum.getEmpiricalQuality();
@ -175,7 +177,83 @@ public class RecalDatumUnitTest extends BaseTest {
@Test
public void testNoObs() {
final RecalDatum rd = new RecalDatum(0, 0, (byte)10);
final RecalDatum rd = new RecalDatum(0L, 0.0, (byte)10);
Assert.assertEquals(rd.getEmpiricalErrorRate(), 0.0);
}
@Test
public void testlog10QempPrior() {
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
Assert.assertTrue(log10prior < 0.0);
Assert.assertFalse(Double.isInfinite(log10prior));
Assert.assertFalse(Double.isNaN(log10prior));
}
}
final int Qrep = 20;
int maxQemp = -1;
double maxQempValue = -Double.MAX_VALUE;
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
if ( log10prior > maxQempValue ) {
maxQemp = Qemp;
maxQempValue = log10prior;
}
}
Assert.assertEquals(maxQemp, Qrep);
}
@Test
public void testBayesianEstimateOfEmpiricalQuality() {
final int Qrep = 20;
// test no shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(0, 0, Qrep), (double)Qrep);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10, 0, Qrep), (double)Qrep);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000, 10, Qrep), (double)Qrep);
// test small shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10, 10, Qrep), Qrep - 1.0);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000, 0, Qrep), Qrep + 1.0);
// test medium shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10000, 0, Qrep), Qrep + 3.0);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10000, 10, Qrep), Qrep + 3.0);
// test large shift
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(100000, 10, Qrep), Qrep + 8.0);
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000000, 10, Qrep), Qrep + 16.0);
}
@Test
public void testlog10QempLikelihood() {
final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 };
final int[] observations = new int[] {0, 10, 1000, 1000000};
final int[] errors = new int[] {0, 10, 1000, 1000000};
for ( double Qemp : Qemps ) {
for ( int observation : observations ) {
for ( int error : errors ) {
if ( error > observation )
continue;
final double log10likelihood = RecalDatum.log10QempLikelihood(Qemp, observation, error);
Assert.assertTrue(observation == 0 ? MathUtils.compareDoubles(log10likelihood, 0.0) == 0 : log10likelihood < 0.0);
Assert.assertFalse(Double.isInfinite(log10likelihood));
Assert.assertFalse(Double.isNaN(log10likelihood));
}
}
}
long bigNum = new Long((long)Integer.MAX_VALUE);
bigNum *= 2L;
final double log10likelihood = RecalDatum.log10QempLikelihood(30, bigNum, 100000);
Assert.assertTrue(log10likelihood < 0.0);
Assert.assertFalse(Double.isInfinite(log10likelihood));
Assert.assertFalse(Double.isNaN(log10likelihood));
}
}

View File

@ -146,7 +146,7 @@ public final class RecalUtilsUnitTest extends BaseTest {
public NestedIntegerArray<RecalDatum> makeTable(final List<Row> rows) {
final NestedIntegerArray<RecalDatum> x = new NestedIntegerArray<RecalDatum>(3, 3);
for ( final Row r : rows )
x.put(new RecalDatum(r.no, r.ne, (byte)10), r.rg, r.qual);
x.put(new RecalDatum((long)r.no, (double)r.ne, (byte)10), r.rg, r.qual);
return x;
}
}

View File

@ -20,9 +20,9 @@ public class RecalibrationReportUnitTest {
private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
final Random random = new Random();
final int nObservations = random.nextInt(maxObservations);
final int nErrors = random.nextInt(maxErrors);
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
return new RecalDatum(nObservations, nErrors, (byte)qual);
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
}
@Test(enabled = true)
@ -90,14 +90,14 @@ public class RecalibrationReportUnitTest {
final int[] covariates = rc.getKeySet(offset, errorMode);
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
nKeys += 2;
for (int j = 0; j < optionalCovariates.size(); j++) {
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j];
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j);
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j];
if ( covValue >= 0 ) {
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.index);
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
nKeys++;
}
}

View File

@ -29,15 +29,46 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
import org.broadinstitute.sting.utils.recalibration.covariates.*;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.List;
public final class RecalibrationTablesUnitTest extends BaseTest {
private RecalibrationTables tables;
private Covariate[] covariates;
private int numReadGroups = 6;
final byte qualByte = 1;
final List<Integer> combineStates = Arrays.asList(0, 1, 2);
@BeforeMethod
private void makeTables() {
covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
tables = new RecalibrationTables(covariates, numReadGroups);
fillTable(tables);
}
private void fillTable(final RecalibrationTables tables) {
for ( int iterations = 0; iterations < 10; iterations++ ) {
for ( final EventType et : EventType.values() ) {
for ( final int rg : combineStates) {
final double error = rg % 2 == 0 ? 1 : 0;
RecalUtils.incrementDatumOrPutIfNecessary(tables.getReadGroupTable(), qualByte, error, rg, et.ordinal());
for ( final int qual : combineStates) {
RecalUtils.incrementDatumOrPutIfNecessary(tables.getQualityScoreTable(), qualByte, error, rg, qual, et.ordinal());
for ( final int cycle : combineStates)
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(2), qualByte, error, rg, qual, cycle, et.ordinal());
for ( final int context : combineStates)
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(3), qualByte, error, rg, qual, context, et.ordinal());
}
}
}
}
}
@Test
public void basicTest() {
final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
final int numReadGroups = 6;
final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups);
final Covariate qualCov = covariates[1];
final Covariate cycleCov = covariates[2];
final Covariate contextCov = covariates[3];
@ -45,11 +76,11 @@ public final class RecalibrationTablesUnitTest extends BaseTest {
Assert.assertEquals(tables.numTables(), covariates.length);
Assert.assertNotNull(tables.getReadGroupTable());
Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.index));
Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()));
testDimensions(tables.getReadGroupTable(), numReadGroups);
Assert.assertNotNull(tables.getQualityScoreTable());
Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index));
Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()));
testDimensions(tables.getQualityScoreTable(), numReadGroups, qualCov.maximumKeyValue() + 1);
Assert.assertNotNull(tables.getTable(2));
@ -72,13 +103,74 @@ public final class RecalibrationTablesUnitTest extends BaseTest {
@Test
public void basicMakeQualityScoreTable() {
final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
final int numReadGroups = 6;
final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups);
final Covariate qualCov = covariates[1];
final NestedIntegerArray<RecalDatum> copy = tables.makeQualityScoreTable();
testDimensions(copy, numReadGroups, qualCov.maximumKeyValue()+1);
Assert.assertEquals(copy.getAllValues().size(), 0);
}
@Test
public void testCombine1() {
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
fillTable(merged);
merged.combine(tables);
for ( int i = 0; i < tables.numTables(); i++ ) {
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
Assert.assertNotNull(mergedValue);
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() * 2);
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() * 2);
}
}
}
@Test
public void testCombineEmptyOther() {
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
merged.combine(tables);
for ( int i = 0; i < tables.numTables(); i++ ) {
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
Assert.assertNotNull(mergedValue);
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations());
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches());
}
}
}
@Test
public void testCombinePartial() {
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
for ( final int rg : combineStates) {
RecalUtils.incrementDatumOrPutIfNecessary(merged.getTable(3), qualByte, 1, rg, 0, 0, 0);
}
merged.combine(tables);
for ( int i = 0; i < tables.numTables(); i++ ) {
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
Assert.assertNotNull(mergedValue);
final int delta = i == 3 && leaf.keys[1] == 0 && leaf.keys[2] == 0 && leaf.keys[3] == 0 ? 1 : 0;
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() + delta);
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() + delta);
}
}
}
}