Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
d3c2c97fb2
|
|
@ -1,99 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.classloader.ProtectedPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine implements ProtectedPackageSource {
|
||||
private final static Logger logger = Logger.getLogger(AdvancedRecalibrationEngine.class);
|
||||
|
||||
final List<NestedIntegerArray<RecalDatum>> allThreadLocalQualityScoreTables = new LinkedList<NestedIntegerArray<RecalDatum>>();
|
||||
private ThreadLocal<NestedIntegerArray<RecalDatum>> threadLocalQualityScoreTables = new ThreadLocal<NestedIntegerArray<RecalDatum>>() {
|
||||
@Override
|
||||
protected synchronized NestedIntegerArray<RecalDatum> initialValue() {
|
||||
final NestedIntegerArray<RecalDatum> table = recalibrationTables.makeQualityScoreTable();
|
||||
allThreadLocalQualityScoreTables.add(table);
|
||||
return table;
|
||||
}
|
||||
};
|
||||
|
||||
@Override
|
||||
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
|
||||
final GATKSAMRecord read = recalInfo.getRead();
|
||||
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
|
||||
final NestedIntegerArray<RecalDatum> qualityScoreTable = getThreadLocalQualityScoreTable();
|
||||
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( ! recalInfo.skip(offset) ) {
|
||||
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.index;
|
||||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
|
||||
incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a NestedIntegerArray for a QualityScore table specific to this thread
|
||||
* @return a non-null NestedIntegerArray ready to be used to collect calibration info for the quality score covariate
|
||||
*/
|
||||
private NestedIntegerArray<RecalDatum> getThreadLocalQualityScoreTable() {
|
||||
return threadLocalQualityScoreTables.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeData() {
|
||||
// merge in all of the thread local tables
|
||||
logger.info("Merging " + allThreadLocalQualityScoreTables.size() + " thread-local quality score tables");
|
||||
for ( final NestedIntegerArray<RecalDatum> localTable : allThreadLocalQualityScoreTables ) {
|
||||
recalibrationTables.combineQualityScoreTable(localTable);
|
||||
}
|
||||
allThreadLocalQualityScoreTables.clear(); // cleanup after ourselves
|
||||
|
||||
super.finalizeData();
|
||||
}
|
||||
}
|
||||
|
|
@ -53,21 +53,21 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "2f250fecb930e0dfe0f63fe0fed3960b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "26c8d7226139a040557b1d3b1c8792f0")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "9b43a1839cb6ea03aec1d96f15ca8efb")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "3159a9d136c45e4a65d46a23dc8fd3b5")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "bb7262829effbbdbc8d88dd36f480368")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "fbb002fa2b9197c4b555852dccc11562")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "7392acb71131a60a527ca32715fc59be")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "49d4383896a90795d94138db1410a7df")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "427448eff98cf194cc7217c0b1401e79")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "50cd1a10b6ecb3d09f90f1e4a66da95d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "1dc71561c9d0fb56f9876cb5043c5376")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "13e8f032e76340b114847c90af0a1f8a")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "03f58ae4f9d203034e895a3636fc108f")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "49d4383896a90795d94138db1410a7df")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2db2ef8c2d63e167663d70340182f49a")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "6b3f252718f59cf9fd3f7612f73a35bf")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "863576ac9ff0b0e02f2e84aef15923a7")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "03e28f48201a35c70d1cf48e9f45364f")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "6e3c5635d387a1c428a7c9c88ad26488")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "6507adcb94bacde4cdee9caa9f14f24b")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "399bbb4bf80764dfc644b2f95d824615")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "34d70899253c2b3343ca9ae944291c30")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "e61fa47bfc08433f0cd55558e2081548")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "5c2622c63225b8b04990baf0ae4de07c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "ee7191d83d7d5bb957dc4595883c32f1")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "da92f4730356f479c2c2b71497cfac6d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "8075595113b48c0c7ead08ce41bef9fe")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "be05834841c5690c66910270521d5c32")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "e61fa47bfc08433f0cd55558e2081548")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "8ee0b498dbbc95ce76393a0f089fec92")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
" -sortAllCols" +
|
||||
" --plot_pdf_file /dev/null" +
|
||||
" --intermediate_csv_file %s",
|
||||
Arrays.asList("d1c38a3418979400630e2bca1140689c"));
|
||||
Arrays.asList("dd6e0e1e3f53f8ae0c8f5de21ded6ee9"));
|
||||
executeTest("testBQSR-CSVfile", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","651469eeacdb3ab9e2690cfb71f6a634");
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","be7dc20bdb5f200d189706bcf1aeb7ee");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","d1c113a17e36762d27eb27fd12528e52");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","25e5ea86d87b7d7ddaad834a6ed7481d");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ab043eed87fadbe5761a55a4912b19ac");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","cdbf268d282e57189a88fb83f0e1fd72");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","95d48e0680019d5406ff9adb8f2ff3ca");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","2ed40925cd112c1a45470d215b7ec4b3");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","8a4ddd64c4e9c42b4a8622582fcfa9c9");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","33695a998bcc906cabcc758727004387");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "b2725242114bf9cc9bca14679705ba40");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("2ba9af34d2a4d55caf152265a30ead46"));
|
||||
Arrays.asList("847605f4efafef89529fe0e496315edd"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("0630c35c070d7a7e0cf22b3cce797f22"));
|
||||
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("5857dcb4e6a8422ae0813e42d433b122"));
|
||||
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("489deda5d3276545364a06b7385f8bd9"));
|
||||
Arrays.asList("dff4412a074940d26994f9552476b209"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("595ba44c75d08dab98df222b8e61ab70"));
|
||||
Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("360f9795facdaa14c0cb4b05207142e4"));
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
|
|
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("4b4a62429f8eac1e2f27ba5e2edea9e5"));
|
||||
Arrays.asList("44e9f6cf11b4efecb454cd3de8de9877"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("cc892c91a93dbd8dbdf645803f35a0ee"));
|
||||
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "3fc7d2681ff753e2d68605d7cf8b63e3";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "e6e33f0ebabab027eabed51fe9a08da9";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("04dc83d7dfb42b8cada91647bd9f32f1"));
|
||||
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("4429a665a1048f958db3c204297cdb9f"));
|
||||
Arrays.asList("55760482335497086458b09e415ecf54"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("f063e3573c513eaa9ce7d7df22143362"));
|
||||
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("d76e93e2676354dde832f08a508c6f88"));
|
||||
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
|
|
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "1a65172b9bd7a2023d48bc758747b34a");
|
||||
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "3f1fa34d8440f6f21654ce60c0ba8f28");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "f240434b4d3c234f6f9e349e9ec05f4e");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("aec378bed312b3557c6dd7ec740c8091"));
|
||||
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "5da6b24033a6b02f466836443d49560e" );
|
||||
testHeterozosity( 0.01, "bdc8760d7ae1e01c0510b12c1e6fcfa3" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "1f284c4af967a3c26687164f9441fb16" );
|
||||
testHeterozosity( 1.0 / 1850, "f508f06a47305e11e62776615cb14fe3" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("cff553c53de970f64051ed5711407038"));
|
||||
Arrays.asList("13d91059f58fb50a07a6a34b9438a45b"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("f960a91963e614a6c8d8cda57836df24"));
|
||||
Arrays.asList("07d8b77a5f6697f3a47a4f1efb0dcf50"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("46a6d24c82ebb99d305462960fa09b7c"));
|
||||
Arrays.asList("0f026d2e568172cf32813cc54ea7ba23"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("2be25321bbc6a963dba7ecba5dd76802"));
|
||||
Arrays.asList("e7ad858e9d6617534761918561f3ed4c"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("d6b2657cd5a4a949968cdab50efce515"));
|
||||
Arrays.asList("39c7a813fd6ee82d3604f2a868b35b2a"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("9cff66a321284c362f393bc4db21f756"));
|
||||
Arrays.asList("9430fe36789a791fcff6162f768ae563"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("90c8cfcf65152534c16ed81104fc3bcd"));
|
||||
Arrays.asList("8d8dbf483526b0b309f5728619a74a86"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("457b8f899cf1665de61e75084dbb79d0"));
|
||||
Arrays.asList("5667a699a3a13474f2d1cd2d6b01cd5b"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("a13fe7aa3b9e8e091b3cf3442a056ec1"));
|
||||
Arrays.asList("b6c1d5cd28ff584c5f5037afef4e883a"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -361,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("d075ad318739c8c56bdce857da1e48b9"));
|
||||
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -373,7 +373,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 20:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("91c632ab17a1dd89ed19ebb20324f905"));
|
||||
Arrays.asList("1e0d2c15546c3b0959b00ffb75488b56"));
|
||||
|
||||
executeTest(String.format("test UG with base indel quality scores"), spec);
|
||||
}
|
||||
|
|
@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("1d80e135d611fe19e1fb1882aa588a73"));
|
||||
Arrays.asList("db3026c49a3de7a5cb9a3d77635d0706"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("752139616752902fca13c312d8fe5e22"));
|
||||
Arrays.asList("7ab8e5ee15ab98d6756b0eea0f4d3798"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -423,7 +423,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("d66b9decf26e1704abda1a919ac149cd"));
|
||||
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
|
||||
Arrays.asList("b62ba9777efc05af4c36e2d4ce3ee67c"));
|
||||
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
|
||||
|
|
@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("f72ecd00b2913f63788faa7dabb1d102"));
|
||||
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "f059743858004ceee325f2a7761a2362");
|
||||
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "04845ba1ec7d8d8b0eab2ca6bdb9c1a6");
|
||||
testReducedCalling("INDEL", "1c9aaf65ffaa12bb766855265a1c3f8e");
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testContaminationDownsampling() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
|
||||
Arrays.asList("b500ad5959bce69f888a2fac024647e5"));
|
||||
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
|
||||
executeTest("test contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,19 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "7122d4f0ef94c5274aa3047cfebe08ed");
|
||||
HCTest(CEUTRIO_BAM, "", "47fdbe5f01d3ce5e53056eea8c488e45");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "6cd6e6787521c07a7bae98766fd628ab");
|
||||
HCTest(NA12878_BAM, "", "a2c63f6e6e51a01019bdbd23125bdb15");
|
||||
}
|
||||
|
||||
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"44df2a9da4fbd2162ae44c3f2a6ef01f");
|
||||
"54b7cc3da3d8349ff4302f99883ab188");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -44,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a413eeb7a75cab0ab5370b4c08dcf8e");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6c0c441b71848c2eea38ab5e2afe1120");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "77cf5b5273828dd1605bb23a5aeafcaa");
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "87ca97f90e74caee35c35616c065821c");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3df42d0550b51eb9b55aac61e8b3c452"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ece627de486aee69d02872891c6cb0ff"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4dbc72b72e3e2d9d812d5a398490e213"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("add0f4f51969b7caeea99005a7ba1aa4"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("f8c2745bf71f2659a57494fcaa2c103b"));
|
||||
Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ import org.apache.log4j.PatternLayout;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
|
@ -289,7 +289,7 @@ public abstract class CommandLineProgram {
|
|||
*/
|
||||
private static void printDocumentationReference() {
|
||||
errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
|
||||
errorPrintf("commonly asked questions " + HelpUtils.BASE_GATK_URL + "%n");
|
||||
errorPrintf("commonly asked questions " + HelpConstants.BASE_GATK_URL + "%n");
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -36,10 +36,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
|||
import org.broadinstitute.sting.gatk.walkers.Attribution;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||
import org.broadinstitute.sting.utils.help.*;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -161,7 +158,7 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
List<String> header = new ArrayList<String>();
|
||||
header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
|
||||
header.add("Copyright (c) 2010 The Broad Institute");
|
||||
header.add("For support and documentation go to " + HelpUtils.BASE_GATK_URL);
|
||||
header.add("For support and documentation go to " + HelpConstants.BASE_GATK_URL);
|
||||
return header;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
*/
|
||||
private final ReadProperties sourceInfo;
|
||||
|
||||
/**
|
||||
* The parser, used to create and build new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
||||
*/
|
||||
|
|
@ -45,7 +40,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,genomeLocParser,reference,rods);
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.filters;
|
|||
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
|
@ -71,7 +71,7 @@ public class FilterManager extends PluginManager<ReadFilter> {
|
|||
|
||||
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
|
||||
userFriendlyListofReadFilters(availableFilters),
|
||||
"Please consult the GATK Documentation (" + HelpUtils.GATK_DOCS_URL + ") for more information.");
|
||||
"Please consult the GATK Documentation (" + HelpConstants.GATK_DOCS_URL + ") for more information.");
|
||||
}
|
||||
|
||||
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public class GATKReport {
|
|||
* @param numColumns the number of columns in this table
|
||||
*/
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
|
||||
addTable(tableName, tableDescription, numColumns, false, false);
|
||||
addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -126,11 +126,10 @@ public class GATKReport {
|
|||
* @param tableName the name of the table
|
||||
* @param tableDescription the description of the table
|
||||
* @param numColumns the number of columns in this table
|
||||
* @param sortByRowID whether to sort the rows by the row ID
|
||||
* @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
|
||||
* @param sortingWay way to sort table
|
||||
*/
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
|
||||
tables.put(tableName, table);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,8 +46,7 @@ public class GATKReportTable {
|
|||
private final String tableName;
|
||||
private final String tableDescription;
|
||||
|
||||
private final boolean sortByRowID;
|
||||
private final boolean sortByAllColumns;
|
||||
private final TableSortingWay sortingWay;
|
||||
|
||||
private List<Object[]> underlyingData;
|
||||
private final List<GATKReportColumn> columnInfo;
|
||||
|
|
@ -73,6 +72,12 @@ public class GATKReportTable {
|
|||
public int index() { return index; }
|
||||
}
|
||||
|
||||
public enum TableSortingWay {
|
||||
SORT_BY_ROW,
|
||||
SORT_BY_COLUMN,
|
||||
DO_NOT_SORT
|
||||
}
|
||||
|
||||
protected enum TableNameHeaderFields {
|
||||
NAME(2),
|
||||
DESCRIPTION(3);
|
||||
|
|
@ -107,10 +112,7 @@ public class GATKReportTable {
|
|||
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
|
||||
|
||||
// when reading from a file, we do not re-sort the rows
|
||||
sortByRowID = false;
|
||||
|
||||
// when reading from a file, we do not re-sort the rows
|
||||
sortByAllColumns = false;
|
||||
sortingWay = TableSortingWay.DO_NOT_SORT;
|
||||
|
||||
// initialize the data
|
||||
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
|
||||
|
|
@ -181,7 +183,7 @@ public class GATKReportTable {
|
|||
* @param numColumns the number of columns in this table
|
||||
*/
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) {
|
||||
this(tableName, tableDescription, numColumns, true, false);
|
||||
this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -190,10 +192,9 @@ public class GATKReportTable {
|
|||
* @param tableName the name of the table
|
||||
* @param tableDescription the description of the table
|
||||
* @param numColumns the number of columns in this table
|
||||
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
|
||||
* @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added)
|
||||
* @param sortingWay in what way to sort rows (instead of the order in which they were added)
|
||||
*/
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) {
|
||||
if ( !isValidName(tableName) ) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
|
@ -204,8 +205,7 @@ public class GATKReportTable {
|
|||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByRowID = sortByRowID;
|
||||
this.sortByAllColumns = sortByAllColumns;
|
||||
this.sortingWay = sortingWay;
|
||||
|
||||
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
|
||||
columnInfo = new ArrayList<GATKReportColumn>(numColumns);
|
||||
|
|
@ -218,7 +218,7 @@ public class GATKReportTable {
|
|||
* @param tableToCopy
|
||||
*/
|
||||
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns);
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay);
|
||||
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
|
||||
addColumn(column.getColumnName(), column.getFormat());
|
||||
if ( copyData )
|
||||
|
|
@ -569,56 +569,53 @@ public class GATKReportTable {
|
|||
out.println();
|
||||
|
||||
// write the table body
|
||||
if ( sortByAllColumns ) {
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
switch (sortingWay) {
|
||||
case SORT_BY_COLUMN:
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
int result = EQUAL;
|
||||
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
}
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
} else if ( sortByRowID ) {
|
||||
// make sure that there are exactly the correct number of ID mappings
|
||||
if ( rowIdToIndex.size() != underlyingData.size() )
|
||||
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
|
||||
});
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
break;
|
||||
case SORT_BY_ROW:
|
||||
// make sure that there are exactly the correct number of ID mappings
|
||||
if ( rowIdToIndex.size() != underlyingData.size() )
|
||||
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
|
||||
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
|
||||
}
|
||||
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
|
||||
writeRow(out, underlyingData.get(rowKey.getValue()));
|
||||
} else {
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
}
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
|
||||
}
|
||||
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
|
||||
writeRow(out, underlyingData.get(rowKey.getValue()));
|
||||
break;
|
||||
case DO_NOT_SORT:
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
|
|
@ -735,53 +732,47 @@ public class GATKReportTable {
|
|||
}
|
||||
|
||||
private List<Object[]> getOrderedRows() {
|
||||
if ( sortByAllColumns ) {
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
switch (sortingWay) {
|
||||
case SORT_BY_COLUMN:
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
int result = EQUAL;
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
}
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
return underlyingData;
|
||||
case SORT_BY_ROW:
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
return underlyingData;
|
||||
}
|
||||
});
|
||||
return underlyingData;
|
||||
} else if ( !sortByRowID ) {
|
||||
return underlyingData;
|
||||
|
||||
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
|
||||
for ( final int rowKey : sortedMap.values() )
|
||||
orderedData.add(underlyingData.get(rowKey));
|
||||
|
||||
return orderedData;
|
||||
default:
|
||||
return underlyingData;
|
||||
}
|
||||
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
return underlyingData;
|
||||
}
|
||||
|
||||
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
|
||||
for ( final int rowKey : sortedMap.values() )
|
||||
orderedData.add(underlyingData.get(rowKey));
|
||||
|
||||
return orderedData;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
sum = processActiveRegion( activeRegion, sum, walker );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -236,9 +236,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return sum;
|
||||
}
|
||||
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
for( final GATKSAMRecord read : myReads ) {
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
|
|
@ -278,7 +278,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
activeRegion.add( read );
|
||||
}
|
||||
}
|
||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
myReads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
|
||||
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
}
|
||||
|
||||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
printProgress(site);
|
||||
printProgress(site.getStopLocation());
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,8 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
|
|||
@Override
|
||||
public void progress(MapData lastProcessedMap) {
|
||||
if ( lastProcessedMap.refContext != null )
|
||||
printProgress(lastProcessedMap.refContext.getLocus());
|
||||
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
|
||||
printProgress(lastProcessedMap.refContext.getLocus().getStopLocation());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,92 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 1/3/13
|
||||
* Time: 11:36 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AverageAltAlleleLength extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation, ExperimentalAnnotation {
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Average Allele Length"));
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("AAL"); }
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
|
||||
if ( !vc.hasLog10PError() )
|
||||
return null;
|
||||
|
||||
final GenotypesContext genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
||||
double length = getMeanAltAlleleLength(vc);
|
||||
map.put(getKeyNames().get(0),String.format("%.2f",length));
|
||||
return map;
|
||||
}
|
||||
|
||||
public static double getMeanAltAlleleLength(VariantContext vc) {
|
||||
double averageLength = 1.0;
|
||||
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
|
||||
// adjust for the event length
|
||||
int averageLengthNum = 0;
|
||||
int averageLengthDenom = 0;
|
||||
int refLength = vc.getReference().length();
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
int numAllele = vc.getCalledChrCount(a);
|
||||
int alleleSize;
|
||||
if ( a.length() == refLength ) {
|
||||
// SNP or MNP
|
||||
byte[] a_bases = a.getBases();
|
||||
byte[] ref_bases = vc.getReference().getBases();
|
||||
int n_mismatch = 0;
|
||||
for ( int idx = 0; idx < a_bases.length; idx++ ) {
|
||||
if ( a_bases[idx] != ref_bases[idx] )
|
||||
n_mismatch++;
|
||||
}
|
||||
alleleSize = n_mismatch;
|
||||
}
|
||||
else if ( a.isSymbolic() ) {
|
||||
alleleSize = 1;
|
||||
} else {
|
||||
alleleSize = Math.abs(refLength-a.length());
|
||||
}
|
||||
averageLengthNum += alleleSize*numAllele;
|
||||
averageLengthDenom += numAllele;
|
||||
}
|
||||
averageLength = ( (double) averageLengthNum )/averageLengthDenom;
|
||||
}
|
||||
|
||||
return averageLength;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import com.sun.org.apache.bcel.internal.generic.AALOAD;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -68,50 +69,17 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( depth == 0 )
|
||||
return null;
|
||||
|
||||
double QD = -10.0 * vc.getLog10PError() / (double)depth;
|
||||
double altAlleleLength = AverageAltAlleleLength.getMeanAltAlleleLength(vc);
|
||||
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
||||
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
|
||||
// adjust for the event length
|
||||
int averageLengthNum = 0;
|
||||
int averageLengthDenom = 0;
|
||||
int refLength = vc.getReference().length();
|
||||
for ( Allele a : vc.getAlternateAlleles() ) {
|
||||
int numAllele = vc.getCalledChrCount(a);
|
||||
int alleleSize;
|
||||
if ( a.length() == refLength ) {
|
||||
// SNP or MNP
|
||||
byte[] a_bases = a.getBases();
|
||||
byte[] ref_bases = vc.getReference().getBases();
|
||||
int n_mismatch = 0;
|
||||
for ( int idx = 0; idx < a_bases.length; idx++ ) {
|
||||
if ( a_bases[idx] != ref_bases[idx] )
|
||||
n_mismatch++;
|
||||
}
|
||||
alleleSize = n_mismatch;
|
||||
}
|
||||
else if ( a.isSymbolic() ) {
|
||||
alleleSize = 1;
|
||||
} else {
|
||||
alleleSize = Math.abs(refLength-a.length());
|
||||
}
|
||||
averageLengthNum += alleleSize*numAllele;
|
||||
averageLengthDenom += numAllele;
|
||||
}
|
||||
double averageLength = ( (double) averageLengthNum )/averageLengthDenom;
|
||||
QD /= averageLength;
|
||||
map.put(getKeyNames().get(1),String.format("%.2f",averageLength));
|
||||
}
|
||||
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD","AAL"); }
|
||||
public List<String> getKeyNames() { return Arrays.asList("QD"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"),
|
||||
new VCFInfoHeaderLine(getKeyNames().get(1), 1, VCFHeaderLineType.Float, "Average Allele Length"));
|
||||
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -113,25 +113,39 @@ import java.util.List;
|
|||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
|
||||
/**
|
||||
* all the command line arguments for BQSR and it's covariates
|
||||
*/
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
/**
|
||||
* When you have nct > 1, BQSR uses nct times more memory to compute its recalibration tables, for efficiency
|
||||
* purposes. If you have many covariates, and therefore are using a lot of memory, you can use this flag
|
||||
* to safely access only one table. There may be some CPU cost, but as long as the table is really big
|
||||
* there should be relatively little CPU costs.
|
||||
*/
|
||||
@Argument(fullName = "lowMemoryMode", shortName="lowMemoryMode", doc="Reduce memory usage in multi-threaded code at the expense of threading efficiency", required = false)
|
||||
public boolean lowMemoryMode = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
|
||||
public double BAQGOP = BAQ.DEFAULT_GOP;
|
||||
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
/**
|
||||
* an object that keeps track of the information necessary for quality score quantization
|
||||
*/
|
||||
private QuantizationInfo quantizationInfo;
|
||||
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
/**
|
||||
* list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
*/
|
||||
private Covariate[] requestedCovariates;
|
||||
|
||||
private RecalibrationEngine recalibrationEngine;
|
||||
|
||||
private int minimumQToUse;
|
||||
|
||||
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
|
||||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||
|
||||
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
|
||||
|
|
@ -143,7 +157,6 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
* Based on the covariates' estimates for initial capacity allocate the data hashmap
|
||||
*/
|
||||
public void initialize() {
|
||||
|
||||
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
|
||||
|
||||
// check for unsupported access
|
||||
|
|
@ -185,29 +198,20 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
throw new UserException.CouldNotCreateOutputFile(RAC.RECAL_TABLE_FILE, e);
|
||||
}
|
||||
|
||||
int numReadGroups = 0;
|
||||
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
|
||||
numReadGroups += header.getReadGroups().size();
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG);
|
||||
|
||||
recalibrationEngine = initializeRecalibrationEngine();
|
||||
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
|
||||
|
||||
initializeRecalibrationEngine();
|
||||
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
|
||||
referenceReader = getToolkit().getReferenceDataSource().getReference();
|
||||
}
|
||||
|
||||
private RecalibrationEngine initializeRecalibrationEngine() {
|
||||
/**
|
||||
* Initialize the recalibration engine
|
||||
*/
|
||||
private void initializeRecalibrationEngine() {
|
||||
int numReadGroups = 0;
|
||||
for ( final SAMFileHeader header : getToolkit().getSAMFileHeaders() )
|
||||
numReadGroups += header.getReadGroups().size();
|
||||
|
||||
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
|
||||
try {
|
||||
final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
|
||||
constructor.setAccessible(true);
|
||||
return (RecalibrationEngine)constructor.newInstance();
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new ReviewedStingException("Unable to create RecalibrationEngine class instance " + recalibrationEngineClass.getSimpleName());
|
||||
}
|
||||
recalibrationEngine = new RecalibrationEngine(requestedCovariates, numReadGroups, RAC.RECAL_TABLE_UPDATE_LOG, lowMemoryMode);
|
||||
}
|
||||
|
||||
private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
|
||||
|
|
@ -501,14 +505,18 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
logger.info("Processed: " + result + " reads");
|
||||
}
|
||||
|
||||
private RecalibrationTables getRecalibrationTable() {
|
||||
return recalibrationEngine.getFinalRecalibrationTables();
|
||||
}
|
||||
|
||||
private void generatePlots() {
|
||||
File recalFile = getToolkit().getArguments().BQSR_RECAL_FILE;
|
||||
if (recalFile != null) {
|
||||
RecalibrationReport report = new RecalibrationReport(recalFile);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), recalibrationTables, requestedCovariates);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, report.getRecalibrationTables(), getRecalibrationTable(), requestedCovariates);
|
||||
}
|
||||
else
|
||||
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
|
||||
RecalUtils.generateRecalibrationPlot(RAC, getRecalibrationTable(), requestedCovariates);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -517,10 +525,10 @@ public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSche
|
|||
* generate a quantization map (recalibrated_qual -> quantized_qual)
|
||||
*/
|
||||
private void quantizeQualityScores() {
|
||||
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
|
||||
quantizationInfo = new QuantizationInfo(getRecalibrationTable(), RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
||||
private void generateReport() {
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, getRecalibrationTable(), requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
|
||||
}
|
||||
}
|
||||
|
|
@ -58,7 +58,8 @@ public final class ReadRecalibrationInfo {
|
|||
if ( covariates == null ) throw new IllegalArgumentException("covariates cannot be null");
|
||||
if ( skips == null ) throw new IllegalArgumentException("skips cannot be null");
|
||||
if ( snpErrors == null ) throw new IllegalArgumentException("snpErrors cannot be null");
|
||||
// future: may allow insertionErrors && deletionErrors to be null, so don't enforce
|
||||
if ( insertionErrors == null ) throw new IllegalArgumentException("insertionErrors cannot be null");
|
||||
if ( deletionErrors == null ) throw new IllegalArgumentException("deletionErrors cannot be null");
|
||||
|
||||
this.read = read;
|
||||
this.baseQuals = read.getBaseQualities();
|
||||
|
|
@ -73,8 +74,8 @@ public final class ReadRecalibrationInfo {
|
|||
|
||||
if ( skips.length != length ) throw new IllegalArgumentException("skips.length " + snpErrors.length + " != length " + length);
|
||||
if ( snpErrors.length != length ) throw new IllegalArgumentException("snpErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( insertionErrors != null && insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( deletionErrors != null && deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( insertionErrors.length != length ) throw new IllegalArgumentException("insertionErrors.length " + snpErrors.length + " != length " + length);
|
||||
if ( deletionErrors.length != length ) throw new IllegalArgumentException("deletionErrors.length " + snpErrors.length + " != length " + length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection {
|
|||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable;
|
||||
if(SORT_BY_ALL_COLUMNS) {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true);
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,35 +1,87 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
public interface RecalibrationEngine {
|
||||
import java.io.PrintStream;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class RecalibrationEngine {
|
||||
final protected Covariate[] covariates;
|
||||
final private int numReadGroups;
|
||||
final private PrintStream maybeLogStream;
|
||||
final private boolean lowMemoryMode;
|
||||
|
||||
/**
|
||||
* Has finalizeData() been called?
|
||||
*/
|
||||
private boolean finalized = false;
|
||||
|
||||
/**
|
||||
* The final (merged, etc) recalibration tables, suitable for downstream analysis.
|
||||
*/
|
||||
private RecalibrationTables finalRecalibrationTables = null;
|
||||
|
||||
private final List<RecalibrationTables> recalibrationTablesList = new LinkedList<RecalibrationTables>();
|
||||
|
||||
private final ThreadLocal<RecalibrationTables> threadLocalTables = new ThreadLocal<RecalibrationTables>() {
|
||||
private synchronized RecalibrationTables makeAndCaptureTable() {
|
||||
final RecalibrationTables newTable = new RecalibrationTables(covariates, numReadGroups, maybeLogStream);
|
||||
recalibrationTablesList.add(newTable);
|
||||
return newTable;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized RecalibrationTables initialValue() {
|
||||
if ( lowMemoryMode ) {
|
||||
return recalibrationTablesList.isEmpty() ? makeAndCaptureTable() : recalibrationTablesList.get(0);
|
||||
} else {
|
||||
return makeAndCaptureTable();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Get a recalibration table suitable for updating the underlying RecalDatums
|
||||
*
|
||||
* May return a thread-local version, or a single version, depending on the initialization
|
||||
* arguments of this instance.
|
||||
*
|
||||
* @return updated tables
|
||||
*/
|
||||
protected RecalibrationTables getUpdatableRecalibrationTables() {
|
||||
return threadLocalTables.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the recalibration engine
|
||||
*
|
||||
|
|
@ -40,21 +92,125 @@ public interface RecalibrationEngine {
|
|||
* The engine should collect match and mismatch data into the recalibrationTables data.
|
||||
*
|
||||
* @param covariates an array of the covariates we'll be using in this engine, order matters
|
||||
* @param recalibrationTables the destination recalibrationTables where stats should be collected
|
||||
* @param numReadGroups the number of read groups we should use for the recalibration tables
|
||||
* @param maybeLogStream an optional print stream for logging calls to the nestedhashmap in the recalibration tables
|
||||
*/
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
|
||||
public RecalibrationEngine(final Covariate[] covariates, final int numReadGroups, final PrintStream maybeLogStream, final boolean enableLowMemoryMode) {
|
||||
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
|
||||
if ( numReadGroups < 1 ) throw new IllegalArgumentException("numReadGroups must be >= 1 but got " + numReadGroups);
|
||||
|
||||
this.covariates = covariates.clone();
|
||||
this.numReadGroups = numReadGroups;
|
||||
this.maybeLogStream = maybeLogStream;
|
||||
this.lowMemoryMode = enableLowMemoryMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the recalibration statistics using the information in recalInfo
|
||||
* @param recalInfo data structure holding information about the recalibration values for a single read
|
||||
*/
|
||||
@Requires("recalInfo != null")
|
||||
public void updateDataForRead(final ReadRecalibrationInfo recalInfo);
|
||||
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
|
||||
final GATKSAMRecord read = recalInfo.getRead();
|
||||
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
|
||||
final RecalibrationTables tables = getUpdatableRecalibrationTables();
|
||||
final NestedIntegerArray<RecalDatum> qualityScoreTable = tables.getQualityScoreTable();
|
||||
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( ! recalInfo.skip(offset) ) {
|
||||
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.ordinal();
|
||||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(qualityScoreTable, qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finalize, if appropriate, all derived data in recalibrationTables.
|
||||
*
|
||||
* Called once after all calls to updateDataForRead have been issued.
|
||||
*
|
||||
* Assumes that all of the principal tables (by quality score) have been completely updated,
|
||||
* and walks over this data to create summary data tables like by read group table.
|
||||
*/
|
||||
public void finalizeData();
|
||||
public void finalizeData() {
|
||||
if ( finalized ) throw new IllegalStateException("FinalizeData() has already been called");
|
||||
|
||||
// merge all of the thread-local tables
|
||||
finalRecalibrationTables = mergeThreadLocalRecalibrationTables();
|
||||
|
||||
final NestedIntegerArray<RecalDatum> byReadGroupTable = finalRecalibrationTables.getReadGroupTable();
|
||||
final NestedIntegerArray<RecalDatum> byQualTable = finalRecalibrationTables.getQualityScoreTable();
|
||||
|
||||
// iterate over all values in the qual table
|
||||
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
|
||||
final int rgKey = leaf.keys[0];
|
||||
final int eventIndex = leaf.keys[2];
|
||||
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
|
||||
final RecalDatum qualDatum = leaf.value;
|
||||
|
||||
if ( rgDatum == null ) {
|
||||
// create a copy of qualDatum, and initialize byReadGroup table with it
|
||||
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
|
||||
} else {
|
||||
// combine the qual datum with the existing datum in the byReadGroup table
|
||||
rgDatum.combine(qualDatum);
|
||||
}
|
||||
}
|
||||
|
||||
finalized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge all of the thread local recalibration tables into a single one.
|
||||
*
|
||||
* Reuses one of the recalibration tables to hold the merged table, so this function can only be
|
||||
* called once in the engine.
|
||||
*
|
||||
* @return the merged recalibration table
|
||||
*/
|
||||
@Requires("! finalized")
|
||||
private RecalibrationTables mergeThreadLocalRecalibrationTables() {
|
||||
if ( recalibrationTablesList.isEmpty() ) throw new IllegalStateException("recalibration tables list is empty");
|
||||
|
||||
RecalibrationTables merged = null;
|
||||
for ( final RecalibrationTables table : recalibrationTablesList ) {
|
||||
if ( merged == null )
|
||||
// fast path -- if there's only only one table, so just make it the merged one
|
||||
merged = table;
|
||||
else {
|
||||
merged.combine(table);
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the final recalibration tables, after finalizeData() has been called
|
||||
*
|
||||
* This returns the finalized recalibration table collected by this engine.
|
||||
*
|
||||
* It is an error to call this function before finalizeData has been called
|
||||
*
|
||||
* @return the finalized recalibration table collected by this engine
|
||||
*/
|
||||
public RecalibrationTables getFinalRecalibrationTables() {
|
||||
if ( ! finalized ) throw new IllegalStateException("Cannot get final recalibration tables until finalizeData() has been called");
|
||||
return finalRecalibrationTables;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,144 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.utils.classloader.PublicPackageSource;
|
||||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
public class StandardRecalibrationEngine implements RecalibrationEngine, PublicPackageSource {
|
||||
protected Covariate[] covariates;
|
||||
protected RecalibrationTables recalibrationTables;
|
||||
|
||||
@Override
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
|
||||
if ( covariates == null ) throw new IllegalArgumentException("Covariates cannot be null");
|
||||
if ( recalibrationTables == null ) throw new IllegalArgumentException("recalibrationTables cannot be null");
|
||||
|
||||
this.covariates = covariates.clone();
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateDataForRead( final ReadRecalibrationInfo recalInfo ) {
|
||||
final GATKSAMRecord read = recalInfo.getRead();
|
||||
final EventType eventType = EventType.BASE_SUBSTITUTION;
|
||||
final ReadCovariates readCovariates = recalInfo.getCovariatesValues();
|
||||
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( ! recalInfo.skip(offset) ) {
|
||||
final byte qual = recalInfo.getQual(eventType, offset);
|
||||
final double isError = recalInfo.getErrorFraction(eventType, offset);
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventType.index);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventType.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
* @param reportedQual the quality score reported by the instrument for this base
|
||||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create derived recalibration data tables
|
||||
*
|
||||
* Assumes that all of the principal tables (by quality score) have been completely updated,
|
||||
* and walks over this data to create summary data tables like by read group table.
|
||||
*/
|
||||
@Override
|
||||
public void finalizeData() {
|
||||
final NestedIntegerArray<RecalDatum> byReadGroupTable = recalibrationTables.getReadGroupTable();
|
||||
final NestedIntegerArray<RecalDatum> byQualTable = recalibrationTables.getQualityScoreTable();
|
||||
|
||||
// iterate over all values in the qual table
|
||||
for ( NestedIntegerArray.Leaf<RecalDatum> leaf : byQualTable.getAllLeaves() ) {
|
||||
final int rgKey = leaf.keys[0];
|
||||
final int eventIndex = leaf.keys[2];
|
||||
final RecalDatum rgDatum = byReadGroupTable.get(rgKey, eventIndex);
|
||||
final RecalDatum qualDatum = leaf.value;
|
||||
|
||||
if ( rgDatum == null ) {
|
||||
// create a copy of qualDatum, and initialize byReadGroup table with it
|
||||
byReadGroupTable.put(new RecalDatum(qualDatum), rgKey, eventIndex);
|
||||
} else {
|
||||
// combine the qual datum with the existing datum in the byReadGroup table
|
||||
rgDatum.combine(qualDatum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
|
||||
final byte qual,
|
||||
final double isError,
|
||||
final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1.0, isError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
report = new GATKReport();
|
||||
report.addTable(reportName, reportDescription, 6, true, false);
|
||||
report.addTable(reportName, reportDescription, 6);
|
||||
table = report.getTable(reportName);
|
||||
table.addColumn("readgroup");
|
||||
table.addColumn("cycle");
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
|
|||
// create the table
|
||||
final String tableName = ve.getSimpleName();
|
||||
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
|
||||
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false);
|
||||
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()));
|
||||
|
||||
// grab the table, and add the columns we need to it
|
||||
final GATKReportTable table = report.getTable(tableName);
|
||||
|
|
|
|||
|
|
@ -32,8 +32,8 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
|
@ -81,7 +81,7 @@ public class VariantDataManager {
|
|||
final double theSTD = standardDeviation(theMean, iii);
|
||||
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
|
||||
if( Double.isNaN(theMean) ) {
|
||||
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.forumPost("discussion/49/using-variant-annotator"));
|
||||
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpConstants.forumPost("discussion/49/using-variant-annotator"));
|
||||
}
|
||||
|
||||
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
|
||||
|
|
|
|||
|
|
@ -54,9 +54,10 @@ import java.util.*;
|
|||
* Left-aligns indels from a variants file.
|
||||
*
|
||||
* <p>
|
||||
* LeftAlignVariants is a tool that takes a VCF file and left-aligns any indels inside it. The same indel can often be
|
||||
* LeftAlignVariants is a tool that takes a VCF file and left-aligns the indels inside it. The same indel can often be
|
||||
* placed at multiple positions and still represent the same haplotype. While the standard convention with VCF is to
|
||||
* place an indel at the left-most position this doesn't always happen, so this tool can be used to left-align them.
|
||||
* Note that this tool cannot handle anything other than bi-allelic, simple indels. Complex events are written out unchanged.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -278,7 +278,7 @@ public class UserException extends ReviewedStingException {
|
|||
|
||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||
public ReadMissingReadGroup(SAMRecord read) {
|
||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpUtils.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName()));
|
||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpConstants.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -354,7 +354,7 @@ public class UserException extends ReviewedStingException {
|
|||
super(String.format("Lexicographically sorted human genome sequence detected in %s."
|
||||
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
|
||||
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
||||
+ "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.forumPost("discussion/58/companion-utilities-reordersam")
|
||||
+ "\nYou can use the ReorderSam utility to fix this problem: " + HelpConstants.forumPost("discussion/58/companion-utilities-reordersam")
|
||||
+ "\n %s contigs = %s",
|
||||
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ public class ForumAPIUtils {
|
|||
Gson gson = new Gson();
|
||||
List<String> output = new ArrayList<String>();
|
||||
|
||||
String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
|
||||
String text = httpGet(HelpConstants.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
|
||||
|
||||
APIQuery details = gson.fromJson(text, APIQuery.class);
|
||||
ForumDiscussion[] discussions = details.Discussions;
|
||||
|
|
@ -158,7 +158,7 @@ public class ForumAPIUtils {
|
|||
Gson gson = new Gson();
|
||||
|
||||
String data = gson.toJson(post.getPostData());
|
||||
httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
|
||||
httpPost(data, HelpConstants.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class GATKDocUtils {
|
|||
/**
|
||||
* The URL root for RELEASED GATKDOC units
|
||||
*/
|
||||
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
|
||||
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpConstants.GATK_DOCS_URL;
|
||||
/**
|
||||
* The URL root for STABLE GATKDOC units
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.help;
|
||||
|
||||
import com.sun.javadoc.FieldDoc;
|
||||
import com.sun.javadoc.PackageDoc;
|
||||
import com.sun.javadoc.ProgramElementDoc;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
public class HelpConstants {
|
||||
|
||||
public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
|
||||
public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
|
||||
public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
|
||||
public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
|
||||
|
||||
public static String forumPost(String post) {
|
||||
return GATK_FORUM_URL + post;
|
||||
}
|
||||
|
||||
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
|
||||
try {
|
||||
Class type = getClassForDoc(classDoc);
|
||||
return lhsClass.isAssignableFrom(type) && (!requireConcrete || JVMUtils.isConcrete(type));
|
||||
} catch (Throwable t) {
|
||||
// Ignore errors.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
protected static Class getClassForDoc(ProgramElementDoc doc) throws ClassNotFoundException {
|
||||
return Class.forName(getClassName(doc));
|
||||
}
|
||||
|
||||
protected static Field getFieldForFieldDoc(FieldDoc fieldDoc) {
|
||||
try {
|
||||
Class clazz = getClassForDoc(fieldDoc.containingClass());
|
||||
return JVMUtils.findField(clazz, fieldDoc.name());
|
||||
} catch (ClassNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconstitute the class name from the given class JavaDoc object.
|
||||
*
|
||||
* @param doc the Javadoc model for the given class.
|
||||
* @return The (string) class name of the given class.
|
||||
*/
|
||||
protected static String getClassName(ProgramElementDoc doc) {
|
||||
PackageDoc containingPackage = doc.containingPackage();
|
||||
return containingPackage.name().length() > 0 ?
|
||||
String.format("%s.%s", containingPackage.name(), doc.name()) :
|
||||
String.format("%s", doc.name());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -33,15 +33,6 @@ import java.lang.reflect.Field;
|
|||
|
||||
public class HelpUtils {
|
||||
|
||||
public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
|
||||
public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
|
||||
public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
|
||||
public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
|
||||
|
||||
public static String forumPost(String post) {
|
||||
return GATK_FORUM_URL + post;
|
||||
}
|
||||
|
||||
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
|
||||
try {
|
||||
Class type = getClassForDoc(classDoc);
|
||||
|
|
|
|||
|
|
@ -160,6 +160,13 @@ public class ProgressMeter {
|
|||
public ProgressMeter(final File performanceLogFile,
|
||||
final String processingUnitName,
|
||||
final GenomeLocSortedSet processingIntervals) {
|
||||
this(performanceLogFile, processingUnitName, processingIntervals, ProgressMeterDaemon.DEFAULT_POLL_FREQUENCY_MILLISECONDS);
|
||||
}
|
||||
|
||||
protected ProgressMeter(final File performanceLogFile,
|
||||
final String processingUnitName,
|
||||
final GenomeLocSortedSet processingIntervals,
|
||||
final long pollingFrequency) {
|
||||
if ( processingUnitName == null ) throw new IllegalArgumentException("processingUnitName cannot be null");
|
||||
if ( processingIntervals == null ) throw new IllegalArgumentException("Target intervals cannot be null");
|
||||
|
||||
|
|
@ -184,10 +191,14 @@ public class ProgressMeter {
|
|||
targetSizeInBP = processingIntervals.coveredSize();
|
||||
|
||||
// start up the timer
|
||||
progressMeterDaemon = new ProgressMeterDaemon(this);
|
||||
progressMeterDaemon = new ProgressMeterDaemon(this, pollingFrequency);
|
||||
start();
|
||||
}
|
||||
|
||||
public ProgressMeterDaemon getProgressMeterDaemon() {
|
||||
return progressMeterDaemon;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start up the progress meter, printing initialization message and starting up the
|
||||
* daemon thread for periodic printing.
|
||||
|
|
@ -223,11 +234,13 @@ public class ProgressMeter {
|
|||
* the progress itself. A separate printing daemon periodically polls the meter to print out
|
||||
* progress
|
||||
*
|
||||
* @param loc Current location, can be null if you are at the end of the processing unit
|
||||
* @param loc Current location, can be null if you are at the end of the processing unit. Must
|
||||
* have size == 1 (cannot be multiple bases in size).
|
||||
* @param nTotalRecordsProcessed the total number of records we've processed
|
||||
*/
|
||||
public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
|
||||
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
|
||||
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
|
||||
|
||||
// weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
|
||||
this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
|
||||
|
|
|
|||
|
|
@ -8,10 +8,20 @@ package org.broadinstitute.sting.utils.progressmeter;
|
|||
* Time: 9:16 PM
|
||||
*/
|
||||
public final class ProgressMeterDaemon extends Thread {
|
||||
public final static long DEFAULT_POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
|
||||
|
||||
/**
|
||||
* How frequently should we poll and print progress?
|
||||
*/
|
||||
private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
|
||||
private final long pollFrequencyMilliseconds;
|
||||
|
||||
/**
|
||||
* How long are we waiting between print progress calls are issued?
|
||||
* @return the time in milliseconds between progress meter calls
|
||||
*/
|
||||
private long getPollFrequencyMilliseconds() {
|
||||
return pollFrequencyMilliseconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* Are we to continue periodically printing status, or should we shut down?
|
||||
|
|
@ -27,13 +37,20 @@ public final class ProgressMeterDaemon extends Thread {
|
|||
* Create a new ProgressMeterDaemon printing progress for meter
|
||||
* @param meter the progress meter to print progress of
|
||||
*/
|
||||
public ProgressMeterDaemon(final ProgressMeter meter) {
|
||||
public ProgressMeterDaemon(final ProgressMeter meter, final long pollFrequencyMilliseconds) {
|
||||
if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
|
||||
if ( pollFrequencyMilliseconds <= 0 ) throw new IllegalArgumentException("pollFrequencyMilliseconds must be greater than 0 but got " + pollFrequencyMilliseconds);
|
||||
|
||||
this.meter = meter;
|
||||
this.pollFrequencyMilliseconds = pollFrequencyMilliseconds;
|
||||
setDaemon(true);
|
||||
setName("ProgressMeterDaemon");
|
||||
}
|
||||
|
||||
public ProgressMeterDaemon(final ProgressMeter meter) {
|
||||
this(meter, DEFAULT_POLL_FREQUENCY_MILLISECONDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this daemon thread to shutdown at the next opportunity, as the progress
|
||||
* metering is complete.
|
||||
|
|
@ -42,6 +59,14 @@ public final class ProgressMeterDaemon extends Thread {
|
|||
this.done = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this daemon thread done?
|
||||
* @return true if done, false otherwise
|
||||
*/
|
||||
public boolean isDone() {
|
||||
return done;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
|
||||
* necessary, the provided progress meter. Never exits until the JVM is complete,
|
||||
|
|
@ -51,7 +76,7 @@ public final class ProgressMeterDaemon extends Thread {
|
|||
while (! done) {
|
||||
meter.printProgress(false);
|
||||
try {
|
||||
Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
|
||||
Thread.sleep(getPollFrequencyMilliseconds());
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -198,7 +198,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
private double getGlobalDeltaQ(final int rgKey, final EventType errorModel) {
|
||||
final Double cached = globalDeltaQs.get(rgKey, errorModel.index);
|
||||
final Double cached = globalDeltaQs.get(rgKey, errorModel.ordinal());
|
||||
|
||||
if ( TEST_CACHING ) {
|
||||
final double calcd = calculateGlobalDeltaQ(rgKey, errorModel);
|
||||
|
|
@ -210,7 +210,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
|
||||
private double getDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ) {
|
||||
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.index);
|
||||
final Double cached = deltaQReporteds.get(rgKey, qualKey, errorModel.ordinal());
|
||||
|
||||
if ( TEST_CACHING ) {
|
||||
final double calcd = calculateDeltaQReported(rgKey, qualKey, errorModel, globalDeltaQ, (byte)qualKey);
|
||||
|
|
@ -240,7 +240,7 @@ public class BaseRecalibration {
|
|||
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
|
||||
double result = 0.0;
|
||||
|
||||
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.index);
|
||||
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
|
||||
|
||||
if (empiricalQualRG != null) {
|
||||
final double globalDeltaQEmpirical = empiricalQualRG.getEmpiricalQuality();
|
||||
|
|
@ -254,7 +254,7 @@ public class BaseRecalibration {
|
|||
private double calculateDeltaQReported(final int rgKey, final int qualKey, final EventType errorModel, final double globalDeltaQ, final byte qualFromRead) {
|
||||
double result = 0.0;
|
||||
|
||||
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.index);
|
||||
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(rgKey, qualKey, errorModel.ordinal());
|
||||
if (empiricalQualQS != null) {
|
||||
final double deltaQReportedEmpirical = empiricalQualQS.getEmpiricalQuality();
|
||||
result = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
||||
|
|
@ -287,7 +287,7 @@ public class BaseRecalibration {
|
|||
final double globalDeltaQ,
|
||||
final double deltaQReported,
|
||||
final byte qualFromRead) {
|
||||
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.index);
|
||||
final RecalDatum empiricalQualCO = table.get(rgKey, qualKey, tableKey, errorModel.ordinal());
|
||||
if (empiricalQualCO != null) {
|
||||
final double deltaQCovariateEmpirical = empiricalQualCO.getEmpiricalQuality();
|
||||
return deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported);
|
||||
|
|
|
|||
|
|
@ -1,41 +1,39 @@
|
|||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
public enum EventType {
|
||||
BASE_SUBSTITUTION(0, "M", "Base Substitution"),
|
||||
BASE_INSERTION(1, "I", "Base Insertion"),
|
||||
BASE_DELETION(2, "D", "Base Deletion");
|
||||
BASE_SUBSTITUTION("M", "Base Substitution"),
|
||||
BASE_INSERTION("I", "Base Insertion"),
|
||||
BASE_DELETION("D", "Base Deletion");
|
||||
|
||||
public final int index;
|
||||
private final String representation;
|
||||
private final String longRepresentation;
|
||||
|
||||
private EventType(int index, String representation, String longRepresentation) {
|
||||
this.index = index;
|
||||
private EventType(String representation, String longRepresentation) {
|
||||
this.representation = representation;
|
||||
this.longRepresentation = longRepresentation;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the EventType corresponding to its ordinal index
|
||||
* @param index an ordinal index
|
||||
* @return the event type corresponding to ordinal index
|
||||
*/
|
||||
public static EventType eventFrom(int index) {
|
||||
switch (index) {
|
||||
case 0:
|
||||
return BASE_SUBSTITUTION;
|
||||
case 1:
|
||||
return BASE_INSERTION;
|
||||
case 2:
|
||||
return BASE_DELETION;
|
||||
default:
|
||||
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
|
||||
}
|
||||
return EventType.values()[index];
|
||||
}
|
||||
|
||||
public static EventType eventFrom(String event) {
|
||||
|
||||
/**
|
||||
* Get the EventType with short string representation
|
||||
* @throws IllegalArgumentException if representation doesn't correspond to one of EventType
|
||||
* @param representation short string representation of the event
|
||||
* @return an EventType
|
||||
*/
|
||||
public static EventType eventFrom(String representation) {
|
||||
for (EventType eventType : EventType.values())
|
||||
if (eventType.representation.equals(event))
|
||||
if (eventType.representation.equals(representation))
|
||||
return eventType;
|
||||
|
||||
throw new ReviewedStingException(String.format("Event %s does not exist.", event));
|
||||
throw new IllegalArgumentException(String.format("Event %s does not exist.", representation));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -67,10 +67,10 @@ public class QuantizationInfo {
|
|||
return quantizationLevels;
|
||||
}
|
||||
|
||||
public GATKReportTable generateReportTable(boolean sortBycols) {
|
||||
public GATKReportTable generateReportTable(boolean sortByCols) {
|
||||
GATKReportTable quantizedTable;
|
||||
if(sortBycols) {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true);
|
||||
if(sortByCols) {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,9 +68,9 @@ public class ReadCovariates {
|
|||
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
|
||||
*/
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.index][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.index][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.index][readOffset][currentCovariateIndex] = deletion;
|
||||
keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,11 +81,11 @@ public class ReadCovariates {
|
|||
* @return
|
||||
*/
|
||||
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
return keys[errorModel.index][readPosition];
|
||||
return keys[errorModel.ordinal()][readPosition];
|
||||
}
|
||||
|
||||
public int[][] getKeySet(final EventType errorModel) {
|
||||
return keys[errorModel.index];
|
||||
return keys[errorModel.ordinal()];
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
|
@ -94,17 +94,9 @@ public class ReadCovariates {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
protected int[][] getMismatchesKeySet() {
|
||||
return keys[EventType.BASE_SUBSTITUTION.index];
|
||||
}
|
||||
|
||||
protected int[][] getInsertionsKeySet() {
|
||||
return keys[EventType.BASE_INSERTION.index];
|
||||
}
|
||||
|
||||
protected int[][] getDeletionsKeySet() {
|
||||
return keys[EventType.BASE_DELETION.index];
|
||||
}
|
||||
protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); }
|
||||
protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); }
|
||||
protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); }
|
||||
|
||||
protected int[] getMismatchesKeySet(final int readPosition) {
|
||||
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class RecalDatum {
|
|||
/**
|
||||
* number of bases seen in total
|
||||
*/
|
||||
private double numObservations;
|
||||
private long numObservations;
|
||||
|
||||
/**
|
||||
* number of bases seen that didn't match the reference
|
||||
|
|
@ -89,13 +89,13 @@ public class RecalDatum {
|
|||
/**
|
||||
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
|
||||
*
|
||||
* @param _numObservations
|
||||
* @param _numMismatches
|
||||
* @param reportedQuality
|
||||
* @param _numObservations observations
|
||||
* @param _numMismatches mismatches
|
||||
* @param reportedQuality Qreported
|
||||
*/
|
||||
public RecalDatum(final double _numObservations, final double _numMismatches, final byte reportedQuality) {
|
||||
public RecalDatum(final long _numObservations, final double _numMismatches, final byte reportedQuality) {
|
||||
if ( _numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||
if ( _numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||
if ( _numMismatches < 0.0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
|
||||
|
||||
numObservations = _numObservations;
|
||||
|
|
@ -106,7 +106,7 @@ public class RecalDatum {
|
|||
|
||||
/**
|
||||
* Copy copy into this recal datum, overwriting all of this objects data
|
||||
* @param copy
|
||||
* @param copy RecalDatum to copy
|
||||
*/
|
||||
public RecalDatum(final RecalDatum copy) {
|
||||
this.numObservations = copy.getNumObservations();
|
||||
|
|
@ -119,7 +119,7 @@ public class RecalDatum {
|
|||
* Add in all of the data from other into this object, updating the reported quality from the expected
|
||||
* error rate implied by the two reported qualities
|
||||
*
|
||||
* @param other
|
||||
* @param other RecalDatum to combine
|
||||
*/
|
||||
public synchronized void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
|
|
@ -192,7 +192,7 @@ public class RecalDatum {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%.2f,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
|
||||
return String.format("%d,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
|
|
@ -205,11 +205,11 @@ public class RecalDatum {
|
|||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final double getNumObservations() {
|
||||
public final long getNumObservations() {
|
||||
return numObservations;
|
||||
}
|
||||
|
||||
public final synchronized void setNumObservations(final double numObservations) {
|
||||
public final synchronized void setNumObservations(final long numObservations) {
|
||||
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||
this.numObservations = numObservations;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
|
|
@ -227,7 +227,7 @@ public class RecalDatum {
|
|||
}
|
||||
|
||||
@Requires({"by >= 0"})
|
||||
public final synchronized void incrementNumObservations(final double by) {
|
||||
public final synchronized void incrementNumObservations(final long by) {
|
||||
numObservations += by;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
|
@ -240,7 +240,7 @@ public class RecalDatum {
|
|||
|
||||
@Requires({"incObservations >= 0", "incMismatches >= 0"})
|
||||
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
|
||||
public final synchronized void increment(final double incObservations, final double incMismatches) {
|
||||
public final synchronized void increment(final long incObservations, final double incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
|
|
@ -248,7 +248,7 @@ public class RecalDatum {
|
|||
|
||||
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
|
||||
public final synchronized void increment(final boolean isError) {
|
||||
increment(1, isError ? 1 : 0.0);
|
||||
increment(1, isError ? 1.0 : 0.0);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -257,19 +257,6 @@ public class RecalDatum {
|
|||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
|
||||
*/
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
private synchronized void calcEmpiricalQuality() {
|
||||
|
||||
// TODO -- add code for Bayesian estimate of Qemp here
|
||||
|
||||
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
/**
|
||||
* calculate the expected number of errors given the estimated Q reported and the number of observations
|
||||
* in this datum.
|
||||
|
|
@ -281,10 +268,29 @@ public class RecalDatum {
|
|||
return getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
static final boolean DEBUG = false;
|
||||
static final double RESOLUTION_BINS_PER_QUAL = 1.0;
|
||||
/**
|
||||
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
|
||||
*/
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
private synchronized void calcEmpiricalQuality() {
|
||||
|
||||
static public double bayesianEstimateOfEmpiricalQuality(final double nObservations, final double nErrors, final double QReported) {
|
||||
// smoothing is one error and one non-error observation
|
||||
final long mismatches = (long)(getNumMismatches() + 0.5) + SMOOTHING_CONSTANT;
|
||||
final long observations = getNumObservations() + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT;
|
||||
|
||||
final double empiricalQual = RecalDatum.bayesianEstimateOfEmpiricalQuality(observations, mismatches, getEstimatedQReported());
|
||||
|
||||
// This is the old and busted point estimate approach:
|
||||
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||
|
||||
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
//static final boolean DEBUG = false;
|
||||
static private final double RESOLUTION_BINS_PER_QUAL = 1.0;
|
||||
|
||||
static public double bayesianEstimateOfEmpiricalQuality(final long nObservations, final long nErrors, final double QReported) {
|
||||
|
||||
final int numBins = (QualityUtils.MAX_REASONABLE_Q_SCORE + 1) * (int)RESOLUTION_BINS_PER_QUAL;
|
||||
|
||||
|
|
@ -294,14 +300,14 @@ public class RecalDatum {
|
|||
|
||||
final double QEmpOfBin = bin / RESOLUTION_BINS_PER_QUAL;
|
||||
|
||||
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10Likelihood(QEmpOfBin, nObservations, nErrors);
|
||||
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10QempLikelihood(QEmpOfBin, nObservations, nErrors);
|
||||
|
||||
if ( DEBUG )
|
||||
System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
|
||||
}
|
||||
|
||||
if ( DEBUG )
|
||||
System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
|
||||
|
||||
final double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10Posteriors);
|
||||
final int MLEbin = MathUtils.maxElementIndex(normalizedPosteriors);
|
||||
|
|
@ -310,35 +316,53 @@ public class RecalDatum {
|
|||
return Qemp;
|
||||
}
|
||||
|
||||
static final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
static private final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
static {
|
||||
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
|
||||
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
|
||||
final double GF_a = 0.0;
|
||||
final double GF_b = 0.9;
|
||||
final double GF_c = 0.0;
|
||||
final double GF_d = 0.5;
|
||||
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
|
||||
|
||||
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
|
||||
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ )
|
||||
log10QempPriorCache[i] = Math.log10(gaussian.value((double) i));
|
||||
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ ) {
|
||||
double log10Prior = Math.log10(gaussian.value((double) i));
|
||||
if ( Double.isInfinite(log10Prior) )
|
||||
log10Prior = -Double.MAX_VALUE;
|
||||
log10QempPriorCache[i] = log10Prior;
|
||||
}
|
||||
}
|
||||
|
||||
static public double log10QempPrior(final double Qempirical, final double Qreported) {
|
||||
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
|
||||
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
|
||||
if ( DEBUG )
|
||||
System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
|
||||
return log10QempPriorCache[difference];
|
||||
}
|
||||
|
||||
static public double log10Likelihood(final double Qempirical, final double nObservations, final double nErrors) {
|
||||
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
|
||||
if ( nObservations == 0 )
|
||||
return 0.0;
|
||||
|
||||
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
|
||||
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
|
||||
// before casting down to an int.
|
||||
if ( nObservations > Integer.MAX_VALUE ) {
|
||||
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
|
||||
final double fraction = (double)Integer.MAX_VALUE / (double)nObservations;
|
||||
nErrors = Math.round((double)nErrors * fraction);
|
||||
nObservations = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
// this is just a straight binomial PDF
|
||||
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
|
||||
if ( log10Prob == Double.NEGATIVE_INFINITY )
|
||||
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
||||
log10Prob = -Double.MAX_VALUE;
|
||||
|
||||
if ( DEBUG )
|
||||
System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
|
||||
|
||||
return log10Prob;
|
||||
}
|
||||
}
|
||||
|
|
@ -264,7 +264,7 @@ public class RecalDatumNode<T extends RecalDatum> {
|
|||
for ( final RecalDatumNode<T> subnode : subnodes ) {
|
||||
// use the yates correction to help avoid all zeros => NaN
|
||||
counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L;
|
||||
counts[i][1] = Math.round(subnode.getRecalDatum().getNumObservations()) + 2L;
|
||||
counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L;
|
||||
i++;
|
||||
}
|
||||
|
||||
|
|
@ -320,7 +320,7 @@ public class RecalDatumNode<T extends RecalDatum> {
|
|||
|
||||
if ( isLeaf() ) {
|
||||
// this is leave node
|
||||
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * recalDatum.getNumObservations();
|
||||
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * (double)recalDatum.getNumObservations();
|
||||
// TODO -- how we can generalize this calculation?
|
||||
// if ( this.qEnd <= minInterestingQual )
|
||||
// // It's free to merge up quality scores below the smallest interesting one
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class RecalUtils {
|
|||
private static final Pair<String, String> eventType = new Pair<String, String>(RecalUtils.EVENT_TYPE_COLUMN_NAME, "%s");
|
||||
private static final Pair<String, String> empiricalQuality = new Pair<String, String>(RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> estimatedQReported = new Pair<String, String>(RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME, "%.4f");
|
||||
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%.2f");
|
||||
private static final Pair<String, String> nObservations = new Pair<String, String>(RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME, "%d");
|
||||
private static final Pair<String, String> nErrors = new Pair<String, String>(RecalUtils.NUMBER_ERRORS_COLUMN_NAME, "%.2f");
|
||||
|
||||
/**
|
||||
|
|
@ -269,9 +269,9 @@ public class RecalUtils {
|
|||
|
||||
final ArrayList<Pair<String, String>> columnNames = new ArrayList<Pair<String, String>>(); // initialize the array to hold the column names
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[0]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()) {
|
||||
columnNames.add(new Pair<String, String>(covariateNameMap.get(requestedCovariates[1]), "%s")); // save the required covariate name so we can reference it in the future
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
|
||||
columnNames.add(covariateValue);
|
||||
columnNames.add(covariateName);
|
||||
}
|
||||
|
|
@ -279,15 +279,15 @@ public class RecalUtils {
|
|||
|
||||
columnNames.add(eventType); // the order of these column names is important here
|
||||
columnNames.add(empiricalQuality);
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())
|
||||
columnNames.add(estimatedQReported); // only the read group table needs the estimated Q reported
|
||||
columnNames.add(nObservations);
|
||||
columnNames.add(nErrors);
|
||||
|
||||
final GATKReportTable reportTable;
|
||||
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
|
||||
if(sortByCols) {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true);
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||
}
|
||||
|
|
@ -295,7 +295,7 @@ public class RecalUtils {
|
|||
reportTable.addColumn(columnName.getFirst(), columnName.getSecond());
|
||||
rowIndex = 0; // reset the row index since we're starting with a new table
|
||||
} else {
|
||||
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index);
|
||||
reportTable = result.get(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal());
|
||||
}
|
||||
|
||||
final NestedIntegerArray<RecalDatum> table = recalibrationTables.getTable(tableIndex);
|
||||
|
|
@ -306,9 +306,9 @@ public class RecalUtils {
|
|||
int columnIndex = 0;
|
||||
int keyIndex = 0;
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), requestedCovariates[0].formatKey(keys[keyIndex++]));
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.index) {
|
||||
if (tableIndex != RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()) {
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), requestedCovariates[1].formatKey(keys[keyIndex++]));
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if (tableIndex >= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal()) {
|
||||
final Covariate covariate = requestedCovariates[tableIndex];
|
||||
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), covariate.formatKey(keys[keyIndex++]));
|
||||
|
|
@ -320,7 +320,7 @@ public class RecalUtils {
|
|||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), event.toString());
|
||||
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEmpiricalQuality());
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.index)
|
||||
if (tableIndex == RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal())
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getEstimatedQReported()); // we only add the estimated Q reported in the RG table
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex++).getFirst(), datum.getNumObservations());
|
||||
reportTable.set(rowIndex, columnNames.get(columnIndex).getFirst(), datum.getNumMismatches());
|
||||
|
|
@ -414,7 +414,7 @@ public class RecalUtils {
|
|||
}
|
||||
|
||||
// add the optional covariates to the delta table
|
||||
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < requestedCovariates.length; i++) {
|
||||
for (int i = RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < requestedCovariates.length; i++) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(i);
|
||||
for (final NestedIntegerArray.Leaf leaf : covTable.getAllLeaves()) {
|
||||
final int[] covs = new int[4];
|
||||
|
|
@ -458,9 +458,9 @@ public class RecalUtils {
|
|||
|
||||
private static List<Object> generateValuesFromKeys(final List<Object> keys, final Covariate[] covariates, final Map<Covariate, String> covariateNameMap) {
|
||||
final List<Object> values = new ArrayList<Object>(4);
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.index].formatKey((Integer)keys.get(0)));
|
||||
values.add(covariates[RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()].formatKey((Integer)keys.get(0)));
|
||||
final int covariateIndex = (Integer)keys.get(1);
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index] : covariates[covariateIndex];
|
||||
final Covariate covariate = covariateIndex == covariates.length ? covariates[RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()] : covariates[covariateIndex];
|
||||
final int covariateKey = (Integer)keys.get(2);
|
||||
values.add(covariate.formatKey(covariateKey));
|
||||
values.add(covariateNameMap.get(covariate));
|
||||
|
|
@ -793,4 +793,48 @@ public class RecalUtils {
|
|||
myDatum.combine(row.value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error value for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
public static void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table,
|
||||
final byte qual,
|
||||
final double isError,
|
||||
final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(1L, isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(1L, isError);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* creates a datum object with one observation and one or zero error
|
||||
*
|
||||
* @param reportedQual the quality score reported by the instrument for this base
|
||||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
private static RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,15 +68,6 @@ public class RecalibrationReport {
|
|||
|
||||
}
|
||||
|
||||
protected RecalibrationReport(final QuantizationInfo quantizationInfo, final RecalibrationTables recalibrationTables, final Covariate[] requestedCovariates, final GATKReportTable argumentTable, final RecalibrationArgumentCollection RAC) {
|
||||
this.quantizationInfo = quantizationInfo;
|
||||
this.recalibrationTables = recalibrationTables;
|
||||
this.requestedCovariates = requestedCovariates;
|
||||
this.argumentTable = argumentTable;
|
||||
this.RAC = RAC;
|
||||
this.optionalCovariateIndexes = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts the number of unique read groups in the table
|
||||
*
|
||||
|
|
@ -139,12 +130,12 @@ public class RecalibrationReport {
|
|||
final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex].keyFromValue(covValue);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex].keyFromValue(covValue);
|
||||
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[3] = event.index;
|
||||
tempCOVarray[3] = event.ordinal();
|
||||
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -161,7 +152,7 @@ public class RecalibrationReport {
|
|||
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.index;
|
||||
tempQUALarray[2] = event.ordinal();
|
||||
|
||||
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
}
|
||||
|
|
@ -178,7 +169,7 @@ public class RecalibrationReport {
|
|||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.index;
|
||||
tempRGarray[1] = event.ordinal();
|
||||
|
||||
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
}
|
||||
|
|
@ -192,11 +183,22 @@ public class RecalibrationReport {
|
|||
else if ( o instanceof Long )
|
||||
return (Long)o;
|
||||
else
|
||||
throw new ReviewedStingException("Object " + o + " is expected to be either a double, long or integer but its not either: " + o.getClass());
|
||||
throw new ReviewedStingException("Object " + o + " is expected to be either a double, long or integer but it's not either: " + o.getClass());
|
||||
}
|
||||
|
||||
private long asLong(final Object o) {
|
||||
if ( o instanceof Long )
|
||||
return (Long)o;
|
||||
else if ( o instanceof Integer )
|
||||
return ((Integer)o).longValue();
|
||||
else if ( o instanceof Double )
|
||||
return ((Double)o).longValue();
|
||||
else
|
||||
throw new ReviewedStingException("Object " + o + " is expected to be a long but it's not: " + o.getClass());
|
||||
}
|
||||
|
||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||
final double nObservations = asDouble(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
|
||||
final long nObservations = asLong(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
|
||||
final double nErrors = asDouble(reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME));
|
||||
final double empiricalQuality = asDouble(reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME));
|
||||
|
||||
|
|
|
|||
|
|
@ -42,15 +42,9 @@ import java.util.ArrayList;
|
|||
|
||||
public final class RecalibrationTables {
|
||||
public enum TableType {
|
||||
READ_GROUP_TABLE(0),
|
||||
QUALITY_SCORE_TABLE(1),
|
||||
OPTIONAL_COVARIATE_TABLES_START(2);
|
||||
|
||||
public final int index;
|
||||
|
||||
private TableType(final int index) {
|
||||
this.index = index;
|
||||
}
|
||||
READ_GROUP_TABLE,
|
||||
QUALITY_SCORE_TABLE,
|
||||
OPTIONAL_COVARIATE_TABLES_START;
|
||||
}
|
||||
|
||||
private final ArrayList<NestedIntegerArray<RecalDatum>> tables;
|
||||
|
|
@ -60,7 +54,7 @@ public final class RecalibrationTables {
|
|||
private final PrintStream log;
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates) {
|
||||
this(covariates, covariates[TableType.READ_GROUP_TABLE.index].maximumKeyValue() + 1, null);
|
||||
this(covariates, covariates[TableType.READ_GROUP_TABLE.ordinal()].maximumKeyValue() + 1, null);
|
||||
}
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
|
||||
|
|
@ -72,31 +66,31 @@ public final class RecalibrationTables {
|
|||
for ( int i = 0; i < covariates.length; i++ )
|
||||
tables.add(i, null); // initialize so we can set below
|
||||
|
||||
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.index].maximumKeyValue() + 1;
|
||||
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1;
|
||||
this.numReadGroups = numReadGroups;
|
||||
this.log = log;
|
||||
|
||||
tables.set(TableType.READ_GROUP_TABLE.index,
|
||||
tables.set(TableType.READ_GROUP_TABLE.ordinal(),
|
||||
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension) :
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, "READ_GROUP_TABLE", numReadGroups, eventDimension));
|
||||
|
||||
tables.set(TableType.QUALITY_SCORE_TABLE.index, makeQualityScoreTable());
|
||||
tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable());
|
||||
|
||||
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.index; i < covariates.length; i++)
|
||||
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++)
|
||||
tables.set(i,
|
||||
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) :
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.index + 1),
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1),
|
||||
numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension));
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getReadGroupTable() {
|
||||
return getTable(TableType.READ_GROUP_TABLE.index);
|
||||
return getTable(TableType.READ_GROUP_TABLE.ordinal());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getQualityScoreTable() {
|
||||
return getTable(TableType.QUALITY_SCORE_TABLE.index);
|
||||
return getTable(TableType.QUALITY_SCORE_TABLE.ordinal());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
|
|
@ -123,12 +117,16 @@ public final class RecalibrationTables {
|
|||
}
|
||||
|
||||
/**
|
||||
* Merge in the quality score table information from qualityScoreTable into this
|
||||
* recalibration table's quality score table.
|
||||
*
|
||||
* @param qualityScoreTable the quality score table we want to merge in
|
||||
* Merge all of the tables from toMerge into into this set of tables
|
||||
*/
|
||||
public void combineQualityScoreTable(final NestedIntegerArray<RecalDatum> qualityScoreTable) {
|
||||
RecalUtils.combineTables(getQualityScoreTable(), qualityScoreTable);
|
||||
public void combine(final RecalibrationTables toMerge) {
|
||||
if ( numTables() != toMerge.numTables() )
|
||||
throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes");
|
||||
|
||||
for ( int i = 0; i < numTables(); i++ ) {
|
||||
final NestedIntegerArray<RecalDatum> myTable = this.getTable(i);
|
||||
final NestedIntegerArray<RecalDatum> otherTable = toMerge.getTable(i);
|
||||
RecalUtils.combineTables(myTable, otherTable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,13 @@ import org.testng.annotations.Test;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Random;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
public class GATKReportUnitTest extends BaseTest {
|
||||
@Test
|
||||
|
|
@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
|
||||
}
|
||||
|
||||
private GATKReportTable getTableWithRandomValues() {
|
||||
Random number = new Random(123L);
|
||||
final int VALUESRANGE = 10;
|
||||
|
||||
GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3");
|
||||
GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN );
|
||||
|
||||
final int NUMROWS = 100;
|
||||
for (int x = 0; x < NUMROWS; x++) {
|
||||
report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE));
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSortingByColumn() {
|
||||
Assert.assertEquals(isSorted(getTableWithRandomValues()), true);
|
||||
}
|
||||
|
||||
private boolean isSorted(GATKReportTable table) {
|
||||
boolean result = true;
|
||||
File testingSortingTableFile = new File("testSortingFile.txt");
|
||||
|
||||
try {
|
||||
// Connect print stream to the output stream
|
||||
PrintStream ps = new PrintStream(testingSortingTableFile);
|
||||
table.write(ps);
|
||||
ps.close();
|
||||
}
|
||||
catch (Exception e){
|
||||
System.err.println ("Error: " + e.getMessage());
|
||||
}
|
||||
|
||||
ArrayList<int[]> rows = new ArrayList<int[]>();
|
||||
try {
|
||||
// Open the file
|
||||
FileInputStream fStream = new FileInputStream(testingSortingTableFile);
|
||||
// Get the object of DataInputStream
|
||||
DataInputStream in = new DataInputStream(fStream);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
String strLine;
|
||||
//Read File Line By Line
|
||||
while ((strLine = br.readLine()) != null) {
|
||||
|
||||
String[] parts = strLine.split(" ");
|
||||
int l = parts.length;
|
||||
int[] row = new int[l];
|
||||
for(int n = 0; n < l; n++) {
|
||||
row[n] = Integer.parseInt(parts[n]);
|
||||
}
|
||||
rows.add(row);
|
||||
}
|
||||
//Close the input stream
|
||||
in.close();
|
||||
} catch (Exception e){//Catch exception if any
|
||||
System.err.println("Error: " + e.getMessage());
|
||||
}
|
||||
for (int x = 1; x < rows.size() && result; x++) {
|
||||
result = checkRowOrder(rows.get(x - 1), rows.get(x));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean checkRowOrder(int[] row1, int[] row2) {
|
||||
int l = row1.length;
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
|
||||
for(int x = 0; x < l && ( result <= EQUAL); x++) {
|
||||
result = ((Integer)row1[x]).compareTo(row2[x]);
|
||||
}
|
||||
if (result <= EQUAL) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private GATKReportTable makeBasicTable() {
|
||||
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
|
||||
GATKReportTable table = report.getTable("TableName");
|
||||
|
|
@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
table.set("RZ", "SomeFloat", 535646345.657453464576);
|
||||
table.set("RZ", "TrueFalse", true);
|
||||
|
||||
report1.addTable("Table3", "blah", 1, true, false);
|
||||
report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW);
|
||||
report1.getTable("Table3").addColumn("a");
|
||||
report1.getTable("Table3").addRowIDMapping("q", 2);
|
||||
report1.getTable("Table3").addRowIDMapping("5", 3);
|
||||
|
|
|
|||
|
|
@ -112,15 +112,14 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
// TODO: test shard boundaries
|
||||
// TODO: reads with indels
|
||||
// TODO: reads which span many regions
|
||||
// TODO: reads which are partially between intervals (in/outside extension)
|
||||
// TODO: duplicate reads
|
||||
|
||||
// TODO: read at the end of a contig
|
||||
// TODO: reads which are completely outside intervals but within extension
|
||||
// TODO: test the extension itself
|
||||
|
||||
// TODO: unmapped reads
|
||||
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
|
||||
|
|
@ -142,6 +141,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050));
|
||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||
reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385));
|
||||
reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400));
|
||||
reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414));
|
||||
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||
|
||||
createBAM(reads);
|
||||
|
|
@ -153,7 +155,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
File indexFile = new File(testBAI);
|
||||
indexFile.deleteOnExit();
|
||||
|
||||
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
|
||||
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile);
|
||||
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
|
||||
out.addAlignment(read);
|
||||
}
|
||||
|
|
@ -272,6 +274,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -286,6 +291,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -309,6 +320,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -323,6 +337,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -347,6 +367,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -361,6 +384,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -429,6 +458,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||
header.setSequenceDictionary(dictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
|
||||
record.setReadName(readName);
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("a127623a26bac4c17c9df491e170ed88"));
|
||||
Arrays.asList("fbfbd4d13b7ba3d76e8e186902e81378"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("13e24e6b9dfa241df5baa2c3f53415b9"));
|
||||
Arrays.asList("19aef8914efc497192f89a9038310ca5"));
|
||||
executeTest("test file has annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("07cb4d427235878aeec0066d7d298e54"));
|
||||
Arrays.asList("4f0b8033da18e6cf6e9b8d5d36c21ba2"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking2() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
|
||||
Arrays.asList("e579097677d5e56a5776151251947961"));
|
||||
Arrays.asList("64ca176d587dfa2b3b9dec9f7999305c"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("348314945436ace71ce6b1a52559d9ee"));
|
||||
Arrays.asList("f33f417fad98c05d9cd08ffa22943b0f"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testOverwritingHeader() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
|
||||
Arrays.asList("ae7930e37a66c0aa4cfe0232736864fe"));
|
||||
Arrays.asList("0c810f6c4abef9d9dc5513ca872d3d22"));
|
||||
executeTest("test overwriting header", spec);
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoReads() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("a0ba056c2625033e5e859fd6bcec1256"));
|
||||
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
|
||||
executeTest("not passing it any reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("0be7da17340111a94e8581ee3808c88a"));
|
||||
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
|
||||
executeTest("getting DB tag with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testMultipleIdsWithDbsnp() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
|
||||
Arrays.asList("e40e625302a496ede42eed61c2ce524b"));
|
||||
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
|
||||
executeTest("adding multiple IDs with dbSNP", spec);
|
||||
}
|
||||
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testDBTagWithHapMap() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("cb50876477d3e035b6eda5d720d7ba8d"));
|
||||
Arrays.asList("cc7184263975595a6e2473d153227146"));
|
||||
executeTest("getting DB tag with HM3", spec);
|
||||
}
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoQuals() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
|
||||
Arrays.asList("458412261d61797d39f802c1e03d63f6"));
|
||||
Arrays.asList("aea983adc01cd059193538cc30adc17d"));
|
||||
executeTest("test file doesn't have QUALs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -138,7 +138,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpression() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("39defa8108dca9fa3e54b22a7da43f77"));
|
||||
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
|
||||
executeTest("using expression", spec);
|
||||
}
|
||||
|
||||
|
|
@ -146,7 +146,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testUsingExpressionWithID() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
|
||||
Arrays.asList("a917edd58a0c235e9395bfc2d2020a8c"));
|
||||
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
|
||||
executeTest("using expression with ID", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,6 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
GATKReport originalReport = new GATKReport(recal_original);
|
||||
GATKReport calculatedReport = new GATKReport(output);
|
||||
|
||||
|
||||
// test the Arguments table
|
||||
List<String> columnsToTest = Arrays.asList(RecalUtils.ARGUMENT_COLUMN_NAME, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
GATKReportTable originalTable = originalReport.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
|
|
@ -86,7 +85,9 @@ public class BQSRGathererUnitTest extends BaseTest {
|
|||
for (String column : columnsToTest) {
|
||||
Object actual = calculated.get(new Integer(row), column);
|
||||
Object expected = original.get(row, column);
|
||||
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Calc Table: " + calculated.getTableName());
|
||||
//if ( !actual.equals(expected) )
|
||||
// System.out.println("Row=" + row + " Table=" + original.getTableName() + " Column=" + column + " Expected=" + expected + " Actual=" + actual);
|
||||
Assert.assertEquals(actual, expected, "Row: " + row + " Original Table: " + original.getTableName() + " Column=" + column);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import net.sf.samtools.SAMUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
public final class ReadRecalibrationInfoUnitTest extends BaseTest {
|
||||
@DataProvider(name = "InfoProvider")
|
||||
public Object[][] createCombineTablesProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final int readLength: Arrays.asList(10, 100, 1000) ) {
|
||||
for ( final boolean includeIndelErrors : Arrays.asList(true, false) ) {
|
||||
tests.add(new Object[]{readLength, includeIndelErrors});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "InfoProvider")
|
||||
public void testReadInfo(final int readLength, final boolean includeIndelErrors) {
|
||||
final ReadCovariates covariates = new ReadCovariates(readLength, 2);
|
||||
|
||||
final byte[] bases = new byte[readLength];
|
||||
final byte[] baseQuals = new byte[readLength];
|
||||
final byte[] insertionQuals = new byte[readLength];
|
||||
final byte[] deletionQuals = new byte[readLength];
|
||||
final boolean[] skips = new boolean[readLength];
|
||||
final double[] snpErrors = new double[readLength];
|
||||
final double[] insertionErrors = new double[readLength];
|
||||
final double[] deletionsErrors = new double[readLength];
|
||||
for ( int i = 0; i < readLength; i++ ) {
|
||||
bases[i] = 'A';
|
||||
baseQuals[i] = (byte)(i % SAMUtils.MAX_PHRED_SCORE);
|
||||
insertionQuals[i] = (byte)((i+1) % SAMUtils.MAX_PHRED_SCORE);
|
||||
deletionQuals[i] = (byte)((i+2) % SAMUtils.MAX_PHRED_SCORE);
|
||||
skips[i] = i % 2 == 0;
|
||||
snpErrors[i] = 1.0 / (i+1);
|
||||
insertionErrors[i] = 0.5 / (i+1);
|
||||
deletionsErrors[i] = 0.3 / (i+1);
|
||||
}
|
||||
|
||||
final EnumMap<EventType, double[]> errors = new EnumMap<EventType, double[]>(EventType.class);
|
||||
errors.put(EventType.BASE_SUBSTITUTION, snpErrors);
|
||||
errors.put(EventType.BASE_INSERTION, insertionErrors);
|
||||
errors.put(EventType.BASE_DELETION, deletionsErrors);
|
||||
|
||||
final EnumMap<EventType, byte[]> quals = new EnumMap<EventType, byte[]>(EventType.class);
|
||||
quals.put(EventType.BASE_SUBSTITUTION, baseQuals);
|
||||
quals.put(EventType.BASE_INSERTION, insertionQuals);
|
||||
quals.put(EventType.BASE_DELETION, deletionQuals);
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, baseQuals, readLength + "M");
|
||||
if ( includeIndelErrors ) {
|
||||
read.setBaseQualities(insertionQuals, EventType.BASE_INSERTION);
|
||||
read.setBaseQualities(deletionQuals, EventType.BASE_DELETION);
|
||||
}
|
||||
|
||||
final ReadRecalibrationInfo info = new ReadRecalibrationInfo(read, covariates, skips, snpErrors, insertionErrors, deletionsErrors);
|
||||
|
||||
Assert.assertEquals(info.getCovariatesValues(), covariates);
|
||||
Assert.assertEquals(info.getRead(), read);
|
||||
|
||||
for ( int i = 0; i < readLength; i++ ) {
|
||||
Assert.assertEquals(info.skip(i), skips[i]);
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
Assert.assertEquals(info.getErrorFraction(et, i), errors.get(et)[i]);
|
||||
final byte expectedQual = et == EventType.BASE_SUBSTITUTION || includeIndelErrors ? quals.get(et)[i]: GATKSAMRecord.DEFAULT_INSERTION_DELETION_QUAL;
|
||||
Assert.assertEquals(info.getQual(et, i), expectedQual);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.progressmeter;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* UnitTests for the ProgressMeterDaemon
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProgressMeterDaemonUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
|
||||
}
|
||||
|
||||
// capture and count calls to progress
|
||||
private class TestingProgressMeter extends ProgressMeter {
|
||||
final List<Long> progressCalls = new LinkedList<Long>();
|
||||
|
||||
private TestingProgressMeter(final long poll) {
|
||||
super(null, "test", new GenomeLocSortedSet(genomeLocParser), poll);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized void printProgress(boolean mustPrint) {
|
||||
progressCalls.add(System.currentTimeMillis());
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PollingData")
|
||||
public Object[][] makePollingData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( final int ticks : Arrays.asList(1, 5, 10) ) {
|
||||
for ( final int poll : Arrays.asList(10, 100) ) {
|
||||
tests.add(new Object[]{poll, ticks});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PollingData", invocationCount = 10, successPercentage = 90)
|
||||
public void testMe(final long poll, final int ticks) throws InterruptedException {
|
||||
final TestingProgressMeter meter = new TestingProgressMeter(poll);
|
||||
final ProgressMeterDaemon daemon = meter.getProgressMeterDaemon();
|
||||
Assert.assertTrue(daemon.isDaemon());
|
||||
|
||||
Assert.assertFalse(daemon.isDone());
|
||||
Thread.sleep(ticks * poll);
|
||||
Assert.assertFalse(daemon.isDone());
|
||||
|
||||
daemon.done();
|
||||
Assert.assertTrue(daemon.isDone());
|
||||
|
||||
Assert.assertTrue(meter.progressCalls.size() >= 1,
|
||||
"Expected at least one progress update call from daemon thread, but only got " + meter.progressCalls.size() + " with exact calls " + meter.progressCalls);
|
||||
|
||||
final int tolerance = (int)Math.ceil(0.8 * meter.progressCalls.size());
|
||||
Assert.assertTrue(Math.abs(meter.progressCalls.size() - ticks) <= tolerance,
|
||||
"Expected " + ticks + " progress calls from daemon thread, but got " + meter.progressCalls.size() + " and a tolerance of only " + tolerance);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.progressmeter;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* UnitTests for the ProgressMeterData
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 8/24/12
|
||||
* Time: 11:25 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ProgressMeterDataUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testBasic() {
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getElapsedSeconds(), 1.0);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getUnitsProcessed(), 2);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 2, 3).getBpProcessed(), 3);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFraction() {
|
||||
final double TOL = 1e-3;
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(10), 0.1, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(10), 0.2, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(100), 0.01, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 2).calculateFractionGenomeTargetCompleted(100), 0.02, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 1).calculateFractionGenomeTargetCompleted(0), 1.0, TOL);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSecondsPerBP() {
|
||||
final double TOL = 1e-3;
|
||||
final long M = 1000000;
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, M).secondsPerMillionBP(), 1.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, M/10).secondsPerMillionBP(), 10.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(2.0, 1, M).secondsPerMillionBP(), 2.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 1, 0).secondsPerMillionBP(), 1e6, TOL);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSecondsPerElement() {
|
||||
final double TOL = 1e-3;
|
||||
final long M = 1000000;
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, M, 1).secondsPerMillionElements(), 1.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, M/10, 1).secondsPerMillionElements(), 10.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(2.00, M, 1).secondsPerMillionElements(), 2.0, TOL);
|
||||
Assert.assertEquals(new ProgressMeterData(1.0, 0, 1).secondsPerMillionElements(), 1e6, TOL);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public final class EventTypeUnitTest extends BaseTest {
|
||||
@Test
|
||||
public void testEventTypes() {
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
Assert.assertNotNull(et.toString());
|
||||
Assert.assertNotNull(et.prettyPrint());
|
||||
Assert.assertFalse("".equals(et.toString()));
|
||||
Assert.assertFalse("".equals(et.prettyPrint()));
|
||||
Assert.assertEquals(EventType.eventFrom(et.ordinal()), et);
|
||||
Assert.assertEquals(EventType.eventFrom(et.toString()), et);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEventTypesEnumItself() {
|
||||
final Set<String> shortReps = new HashSet<String>();
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
Assert.assertFalse(shortReps.contains(et.toString()), "Short representative for EventType has duplicates for " + et);
|
||||
shortReps.add(et.toString());
|
||||
}
|
||||
Assert.assertEquals(shortReps.size(), EventType.values().length, "Short representatives for EventType aren't unique");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadString() {
|
||||
EventType.eventFrom("asdfhalsdjfalkjsdf");
|
||||
}
|
||||
}
|
||||
|
|
@ -30,16 +30,13 @@ package org.broadinstitute.sting.utils.recalibration;
|
|||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class RecalDatumUnitTest extends BaseTest {
|
||||
|
|
@ -74,7 +71,7 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
public RecalDatum makeRecalDatum() {
|
||||
return new RecalDatum(exTotal, exError, (byte)getReportedQual());
|
||||
return new RecalDatum((long)exTotal, (double)exError, (byte)getReportedQual());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -83,13 +80,19 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private static boolean createdDatumTestProviders = false;
|
||||
|
||||
@DataProvider(name = "RecalDatumTestProvider")
|
||||
public Object[][] makeRecalDatumTestProvider() {
|
||||
for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
|
||||
for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
|
||||
for ( int reportedQual : Arrays.asList(10, 20) )
|
||||
if ( E <= N )
|
||||
new RecalDatumTestProvider(E, N, reportedQual);
|
||||
if ( !createdDatumTestProviders ) {
|
||||
for ( int E : Arrays.asList(1, 10, 100, 1000, 10000) )
|
||||
for ( int N : Arrays.asList(10, 100, 1000, 10000, 100000, 1000000) )
|
||||
for ( int reportedQual : Arrays.asList(10, 20) )
|
||||
if ( E <= N )
|
||||
new RecalDatumTestProvider(E, N, reportedQual);
|
||||
createdDatumTestProviders = true;
|
||||
}
|
||||
|
||||
return RecalDatumTestProvider.getTests(RecalDatumTestProvider.class);
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +107,6 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
Assert.assertEquals(datum.getNumObservations(), cfg.exTotal, 1E-6);
|
||||
if ( cfg.getReportedQual() != -1 )
|
||||
Assert.assertEquals(datum.getEstimatedQReportedAsByte(), cfg.getReportedQual());
|
||||
BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalQuality(), cfg.getErrorRatePhredScaled());
|
||||
BaseTest.assertEqualsDoubleSmart(datum.getEmpiricalErrorRate(), cfg.getErrorRate());
|
||||
|
||||
final double e = datum.getEmpiricalQuality();
|
||||
|
|
@ -175,7 +177,83 @@ public class RecalDatumUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testNoObs() {
|
||||
final RecalDatum rd = new RecalDatum(0, 0, (byte)10);
|
||||
final RecalDatum rd = new RecalDatum(0L, 0.0, (byte)10);
|
||||
Assert.assertEquals(rd.getEmpiricalErrorRate(), 0.0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testlog10QempPrior() {
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
|
||||
for ( int Qrep = 0; Qrep <= QualityUtils.MAX_QUAL_SCORE; Qrep++ ) {
|
||||
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
|
||||
Assert.assertTrue(log10prior < 0.0);
|
||||
Assert.assertFalse(Double.isInfinite(log10prior));
|
||||
Assert.assertFalse(Double.isNaN(log10prior));
|
||||
}
|
||||
}
|
||||
|
||||
final int Qrep = 20;
|
||||
int maxQemp = -1;
|
||||
double maxQempValue = -Double.MAX_VALUE;
|
||||
for ( int Qemp = 0; Qemp <= QualityUtils.MAX_QUAL_SCORE; Qemp++ ) {
|
||||
final double log10prior = RecalDatum.log10QempPrior(Qemp, Qrep);
|
||||
if ( log10prior > maxQempValue ) {
|
||||
maxQemp = Qemp;
|
||||
maxQempValue = log10prior;
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(maxQemp, Qrep);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBayesianEstimateOfEmpiricalQuality() {
|
||||
|
||||
final int Qrep = 20;
|
||||
|
||||
// test no shift
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(0, 0, Qrep), (double)Qrep);
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10, 0, Qrep), (double)Qrep);
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000, 10, Qrep), (double)Qrep);
|
||||
|
||||
// test small shift
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10, 10, Qrep), Qrep - 1.0);
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000, 0, Qrep), Qrep + 1.0);
|
||||
|
||||
// test medium shift
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10000, 0, Qrep), Qrep + 3.0);
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(10000, 10, Qrep), Qrep + 3.0);
|
||||
|
||||
// test large shift
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(100000, 10, Qrep), Qrep + 8.0);
|
||||
Assert.assertEquals(RecalDatum.bayesianEstimateOfEmpiricalQuality(1000000, 10, Qrep), Qrep + 16.0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testlog10QempLikelihood() {
|
||||
|
||||
final double[] Qemps = new double[] { 0.0, 10.0, 20.0, 30.0 };
|
||||
final int[] observations = new int[] {0, 10, 1000, 1000000};
|
||||
final int[] errors = new int[] {0, 10, 1000, 1000000};
|
||||
|
||||
for ( double Qemp : Qemps ) {
|
||||
for ( int observation : observations ) {
|
||||
for ( int error : errors ) {
|
||||
if ( error > observation )
|
||||
continue;
|
||||
|
||||
final double log10likelihood = RecalDatum.log10QempLikelihood(Qemp, observation, error);
|
||||
Assert.assertTrue(observation == 0 ? MathUtils.compareDoubles(log10likelihood, 0.0) == 0 : log10likelihood < 0.0);
|
||||
Assert.assertFalse(Double.isInfinite(log10likelihood));
|
||||
Assert.assertFalse(Double.isNaN(log10likelihood));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
long bigNum = new Long((long)Integer.MAX_VALUE);
|
||||
bigNum *= 2L;
|
||||
final double log10likelihood = RecalDatum.log10QempLikelihood(30, bigNum, 100000);
|
||||
Assert.assertTrue(log10likelihood < 0.0);
|
||||
Assert.assertFalse(Double.isInfinite(log10likelihood));
|
||||
Assert.assertFalse(Double.isNaN(log10likelihood));
|
||||
}
|
||||
}
|
||||
|
|
@ -146,7 +146,7 @@ public final class RecalUtilsUnitTest extends BaseTest {
|
|||
public NestedIntegerArray<RecalDatum> makeTable(final List<Row> rows) {
|
||||
final NestedIntegerArray<RecalDatum> x = new NestedIntegerArray<RecalDatum>(3, 3);
|
||||
for ( final Row r : rows )
|
||||
x.put(new RecalDatum(r.no, r.ne, (byte)10), r.rg, r.qual);
|
||||
x.put(new RecalDatum((long)r.no, (double)r.ne, (byte)10), r.rg, r.qual);
|
||||
return x;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,9 +20,9 @@ public class RecalibrationReportUnitTest {
|
|||
private static RecalDatum createRandomRecalDatum(int maxObservations, int maxErrors) {
|
||||
final Random random = new Random();
|
||||
final int nObservations = random.nextInt(maxObservations);
|
||||
final int nErrors = random.nextInt(maxErrors);
|
||||
final int nErrors = Math.min(random.nextInt(maxErrors), nObservations);
|
||||
final int qual = random.nextInt(QualityUtils.MAX_QUAL_SCORE);
|
||||
return new RecalDatum(nObservations, nErrors, (byte)qual);
|
||||
return new RecalDatum((long)nObservations, (double)nErrors, (byte)qual);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
@ -90,14 +90,14 @@ public class RecalibrationReportUnitTest {
|
|||
final int[] covariates = rc.getKeySet(offset, errorMode);
|
||||
final int randomMax = errorMode == EventType.BASE_SUBSTITUTION ? 10000 : 100000;
|
||||
|
||||
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.index);
|
||||
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.index);
|
||||
rgTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], errorMode.ordinal());
|
||||
qualTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], errorMode.ordinal());
|
||||
nKeys += 2;
|
||||
for (int j = 0; j < optionalCovariates.size(); j++) {
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j);
|
||||
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index + j];
|
||||
final NestedIntegerArray<RecalDatum> covTable = recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j);
|
||||
final int covValue = covariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + j];
|
||||
if ( covValue >= 0 ) {
|
||||
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.index);
|
||||
covTable.put(createRandomRecalDatum(randomMax, 10), covariates[0], covariates[1], covValue, errorMode.ordinal());
|
||||
nKeys++;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,15 +29,46 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public final class RecalibrationTablesUnitTest extends BaseTest {
|
||||
private RecalibrationTables tables;
|
||||
private Covariate[] covariates;
|
||||
private int numReadGroups = 6;
|
||||
final byte qualByte = 1;
|
||||
final List<Integer> combineStates = Arrays.asList(0, 1, 2);
|
||||
|
||||
@BeforeMethod
|
||||
private void makeTables() {
|
||||
covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
|
||||
tables = new RecalibrationTables(covariates, numReadGroups);
|
||||
fillTable(tables);
|
||||
}
|
||||
|
||||
private void fillTable(final RecalibrationTables tables) {
|
||||
for ( int iterations = 0; iterations < 10; iterations++ ) {
|
||||
for ( final EventType et : EventType.values() ) {
|
||||
for ( final int rg : combineStates) {
|
||||
final double error = rg % 2 == 0 ? 1 : 0;
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getReadGroupTable(), qualByte, error, rg, et.ordinal());
|
||||
for ( final int qual : combineStates) {
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getQualityScoreTable(), qualByte, error, rg, qual, et.ordinal());
|
||||
for ( final int cycle : combineStates)
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(2), qualByte, error, rg, qual, cycle, et.ordinal());
|
||||
for ( final int context : combineStates)
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(tables.getTable(3), qualByte, error, rg, qual, context, et.ordinal());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void basicTest() {
|
||||
final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
|
||||
final int numReadGroups = 6;
|
||||
final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups);
|
||||
|
||||
final Covariate qualCov = covariates[1];
|
||||
final Covariate cycleCov = covariates[2];
|
||||
final Covariate contextCov = covariates[3];
|
||||
|
|
@ -45,11 +76,11 @@ public final class RecalibrationTablesUnitTest extends BaseTest {
|
|||
Assert.assertEquals(tables.numTables(), covariates.length);
|
||||
|
||||
Assert.assertNotNull(tables.getReadGroupTable());
|
||||
Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.index));
|
||||
Assert.assertEquals(tables.getReadGroupTable(), tables.getTable(RecalibrationTables.TableType.READ_GROUP_TABLE.ordinal()));
|
||||
testDimensions(tables.getReadGroupTable(), numReadGroups);
|
||||
|
||||
Assert.assertNotNull(tables.getQualityScoreTable());
|
||||
Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.index));
|
||||
Assert.assertEquals(tables.getQualityScoreTable(), tables.getTable(RecalibrationTables.TableType.QUALITY_SCORE_TABLE.ordinal()));
|
||||
testDimensions(tables.getQualityScoreTable(), numReadGroups, qualCov.maximumKeyValue() + 1);
|
||||
|
||||
Assert.assertNotNull(tables.getTable(2));
|
||||
|
|
@ -72,13 +103,74 @@ public final class RecalibrationTablesUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void basicMakeQualityScoreTable() {
|
||||
final Covariate[] covariates = RecalibrationTestUtils.makeInitializedStandardCovariates();
|
||||
final int numReadGroups = 6;
|
||||
final RecalibrationTables tables = new RecalibrationTables(covariates, numReadGroups);
|
||||
|
||||
final Covariate qualCov = covariates[1];
|
||||
final NestedIntegerArray<RecalDatum> copy = tables.makeQualityScoreTable();
|
||||
testDimensions(copy, numReadGroups, qualCov.maximumKeyValue()+1);
|
||||
Assert.assertEquals(copy.getAllValues().size(), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombine1() {
|
||||
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
|
||||
fillTable(merged);
|
||||
|
||||
merged.combine(tables);
|
||||
|
||||
for ( int i = 0; i < tables.numTables(); i++ ) {
|
||||
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
|
||||
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
|
||||
|
||||
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
|
||||
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
|
||||
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
|
||||
Assert.assertNotNull(mergedValue);
|
||||
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() * 2);
|
||||
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() * 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombineEmptyOther() {
|
||||
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
|
||||
|
||||
merged.combine(tables);
|
||||
|
||||
for ( int i = 0; i < tables.numTables(); i++ ) {
|
||||
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
|
||||
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
|
||||
|
||||
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
|
||||
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
|
||||
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
|
||||
Assert.assertNotNull(mergedValue);
|
||||
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations());
|
||||
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCombinePartial() {
|
||||
final RecalibrationTables merged = new RecalibrationTables(covariates, numReadGroups);
|
||||
for ( final int rg : combineStates) {
|
||||
RecalUtils.incrementDatumOrPutIfNecessary(merged.getTable(3), qualByte, 1, rg, 0, 0, 0);
|
||||
}
|
||||
|
||||
merged.combine(tables);
|
||||
for ( int i = 0; i < tables.numTables(); i++ ) {
|
||||
NestedIntegerArray<RecalDatum> table = tables.getTable(i);
|
||||
NestedIntegerArray<RecalDatum> mergedTable = merged.getTable(i);
|
||||
|
||||
Assert.assertEquals(table.getAllLeaves().size(), mergedTable.getAllLeaves().size());
|
||||
for ( final NestedIntegerArray.Leaf<RecalDatum> leaf : table.getAllLeaves() ) {
|
||||
final RecalDatum mergedValue = mergedTable.get(leaf.keys);
|
||||
Assert.assertNotNull(mergedValue);
|
||||
|
||||
final int delta = i == 3 && leaf.keys[1] == 0 && leaf.keys[2] == 0 && leaf.keys[3] == 0 ? 1 : 0;
|
||||
Assert.assertEquals(mergedValue.getNumObservations(), leaf.value.getNumObservations() + delta);
|
||||
Assert.assertEquals(mergedValue.getNumMismatches(), leaf.value.getNumMismatches() + delta);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue