Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Ami Levy-Moonshine 2013-01-03 13:42:31 -05:00
commit 10a705b27f
29 changed files with 500 additions and 3345 deletions

View File

@ -111,10 +111,9 @@
<!-- clover parameters -->
<property name="clover.jar" location="private/resources/clover/lib/clover.jar"/>
<property name="clover.instrument.level" location="method"/>
<property name="clover.instrument.level" value="method"/>
<taskdef resource="cloverlib.xml" classpath="${clover.jar}"/>
<!-- ******************************************************************************** -->
<!-- Filesets and paths -->
<!-- ******************************************************************************** -->
@ -273,19 +272,19 @@
<!-- Comment out the following lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
<!-- <get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}" -->
<!-- dest="${ivy.jar.dir}/${ivy.jar.file}" -->
<!-- usetimestamp="true"/> -->
<!-- <taskdef resource="org/apache/ivy/ant/antlib.xml" -->
<!-- uri="antlib:org.apache.ivy.ant" -->
<!-- classpath="${ivy.jar.dir}/${ivy.jar.file}"/> -->
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
dest="${ivy.jar.dir}/${ivy.jar.file}"
usetimestamp="true"/>
<taskdef resource="org/apache/ivy/ant/antlib.xml"
uri="antlib:org.apache.ivy.ant"
classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
<!-- <get src="http://repo1.maven.org/maven2/org/apache/maven/maven-ant-tasks/${maven-ant-tasks.install.version}/${maven-ant-tasks.jar.file}" -->
<!-- dest="${ivy.jar.dir}/${maven-ant-tasks.jar.file}" -->
<!-- usetimestamp="true"/> -->
<!-- <taskdef resource="org/apache/maven/artifact/ant/antlib.xml" -->
<!-- uri="antlib:antlib:org.apache.maven.artifact.ant" -->
<!-- classpath="${ivy.jar.dir}/${maven-ant-tasks.jar.file}"/> -->
<get src="http://repo1.maven.org/maven2/org/apache/maven/maven-ant-tasks/${maven-ant-tasks.install.version}/${maven-ant-tasks.jar.file}"
dest="${ivy.jar.dir}/${maven-ant-tasks.jar.file}"
usetimestamp="true"/>
<taskdef resource="org/apache/maven/artifact/ant/antlib.xml"
uri="antlib:antlib:org.apache.maven.artifact.ant"
classpath="${ivy.jar.dir}/${maven-ant-tasks.jar.file}"/>
<!-- End network lines -->
@ -1031,6 +1030,14 @@
<delete dir="${scaladoc.dir}" />
</target>
<target name="-check.clover">
<available property="clover.installed" classname="com.cenqua.clover.CloverInstr" />
</target>
<target name="clean.clover" depends="-check.clover" if="clover.installed">
<clover-clean/>
</target>
<target name="clean.gsalib">
<!-- Currently not cleaning out the lib during 'ant clean' -->
<exec executable="R" failonerror="false">
@ -1038,7 +1045,7 @@
</exec>
</target>
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs,clean.clover">
<delete dir="${build.dir}"/>
<delete dir="${lib.dir}"/>
<delete dir="${contract.dump.dir}"/>
@ -1135,12 +1142,35 @@
<!-- Test targets -->
<target name="clover.clean">
<clover-clean/>
</target>
<target name="clover.report">
<clover-html-report outdir="clover_html" title="GATK Clover report"/>
<clover-report coverageCacheSize="nocache">
<current outfile="clover_html" title="GATK clover report" showUniqueCoverage="false" numThreads="4">
<format type="html" filter="catch,static,property"/>
<fileset dir="public">
<patternset id="clover.excludes">
<exclude name="**/*UnitTest.java"/>
<exclude name="**/*TestProvider*.java"/>
<exclude name="**/*PerformanceTest.java"/>
<exclude name="**/*Benchmark.java"/>
<exclude name="**/*LargeScaleTest.java"/>
<exclude name="**/*IntegrationTest.java"/>
<exclude name="**/jna/**/*.java"/>
<exclude name="**/queue/extensions/**/*.java"/>
<exclude name="**/sting/utils/help/*.java"/>
<exclude name="**/sting/tools/*.java"/>
<exclude name="**/datasources/reads/utilities/*.java"/>
<exclude name="**/sting/alignment/**/*.java"/>
<exclude name="**/examples/**/*.java"/>
</patternset>
</fileset>
<fileset dir="private">
<patternset refid="clover.excludes" />
</fileset>
<fileset dir="protected">
<patternset refid="clover.excludes" />
</fileset>
</current>
</clover-report>
</target>
<target name="with.clover">
@ -1148,6 +1178,7 @@
</clover-setup>
<property name="compile.scala" value="false" /> <!-- currently doesn't work with scala -->
<property name="test.maxmemory" value="32g"/> <!-- clover requires lots of memory -->
<echo message="Clover instrument level: ${clover.instrument.level}" />
</target>
<target name="test.init.compile">
@ -1247,7 +1278,7 @@
<jvmarg value="-Djava.awt.headless=true" />
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<!-- <jvmarg line="${cofoja.jvm.args}"/> -->
<jvmarg line="${cofoja.jvm.args}"/>
<jvmarg line="${debug.jvm.args}"/>
<!-- NOTE: To run tests with debugging, use -Dtest.debug=true -Dtest.debug.port=XXXX on the command line -->

View File

@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testSNP_ACS_Pools() {
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","651469eeacdb3ab9e2690cfb71f6a634");
}
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc");
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","be7dc20bdb5f200d189706bcf1aeb7ee");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7");
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","25e5ea86d87b7d7ddaad834a6ed7481d");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","6987b89e04dcb604d3743bb09aa9587d");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","cdbf268d282e57189a88fb83f0e1fd72");
}
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","d0780f70365ed1b431099fd3b4cec449");
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","2ed40925cd112c1a45470d215b7ec4b3");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","33695a998bcc906cabcc758727004387");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "b2725242114bf9cc9bca14679705ba40");
}
}

View File

@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("847605f4efafef89529fe0e496315edd"));
Arrays.asList("2ba9af34d2a4d55caf152265a30ead46"));
executeTest("test MultiSample Pilot1", spec);
}
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
Arrays.asList("0630c35c070d7a7e0cf22b3cce797f22"));
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
}
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testWithAllelesPassedIn2() {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
Arrays.asList("5857dcb4e6a8422ae0813e42d433b122"));
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
}
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("fea530fdc8677e10be4cc11625fa5376"));
Arrays.asList("489deda5d3276545364a06b7385f8bd9"));
executeTest("test SingleSample Pilot2", spec);
}
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("b41b95aaa2c453c9b75b3b29a9c2718e"));
Arrays.asList("595ba44c75d08dab98df222b8e61ab70"));
executeTest("test Multiple SNP alleles", spec);
}
@ -70,7 +70,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testBadRead() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
Arrays.asList("d915535c1458733f09f82670092fcab6"));
Arrays.asList("360f9795facdaa14c0cb4b05207142e4"));
executeTest("test bad read", spec);
}
@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("e14c9b1f9f34d6c16de445bfa385be89"));
Arrays.asList("4b4a62429f8eac1e2f27ba5e2edea9e5"));
executeTest("test reverse trim", spec);
}
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
Arrays.asList("cc892c91a93dbd8dbdf645803f35a0ee"));
executeTest("test mismatched PLs", spec);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "af8187e2baf516dde1cddea787a52b8a";
private final static String COMPRESSED_OUTPUT_MD5 = "3fc7d2681ff753e2d68605d7cf8b63e3";
@Test
public void testCompressedOutput() {
@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinBaseQualityScore() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
Arrays.asList("04dc83d7dfb42b8cada91647bd9f32f1"));
executeTest("test min_base_quality_score 26", spec);
}
@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSLOD() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("55760482335497086458b09e415ecf54"));
Arrays.asList("4429a665a1048f958db3c204297cdb9f"));
executeTest("test SLOD", spec);
}
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNDA() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
Arrays.asList("f063e3573c513eaa9ce7d7df22143362"));
executeTest("test NDA", spec);
}
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testCompTrack() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
Arrays.asList("d76e93e2676354dde832f08a508c6f88"));
executeTest("test using comp track", spec);
}
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOutputParameterSitesOnly() {
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
testOutputParameters("-sites_only", "1a65172b9bd7a2023d48bc758747b34a");
}
@Test
public void testOutputParameterAllConfident() {
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "3f1fa34d8440f6f21654ce60c0ba8f28");
}
@Test
public void testOutputParameterAllSites() {
testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
testOutputParameters("--output_mode EMIT_ALL_SITES", "f240434b4d3c234f6f9e349e9ec05f4e");
}
private void testOutputParameters(final String args, final String md5) {
@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
Arrays.asList("aec378bed312b3557c6dd7ec740c8091"));
executeTest("test confidence 1", spec1);
}
@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testHeterozyosity1() {
testHeterozosity( 0.01, "8dd37249e0a80afa86594c3f1e720760" );
testHeterozosity( 0.01, "5da6b24033a6b02f466836443d49560e" );
}
@Test
public void testHeterozyosity2() {
testHeterozosity( 1.0 / 1850, "040d169e20fda56f8de009a6015eb384" );
testHeterozosity( 1.0 / 1850, "1f284c4af967a3c26687164f9441fb16" );
}
private void testHeterozosity(final double arg, final String md5) {
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("0e4713e4aa44f4f8fcfea7138295a627"));
Arrays.asList("cff553c53de970f64051ed5711407038"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("46ea5d1ceb8eed1d0db63c3577915d6c"));
Arrays.asList("f960a91963e614a6c8d8cda57836df24"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("f6f8fbf733f20fbc1dd9ebaf8faefe6c"));
Arrays.asList("46a6d24c82ebb99d305462960fa09b7c"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("4438ad0f03bbdd182d9bb59b15af0fa5"));
Arrays.asList("2be25321bbc6a963dba7ecba5dd76802"));
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
}
@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("27b4ace2ad5a83d8cccb040f97f29183"));
Arrays.asList("d6b2657cd5a4a949968cdab50efce515"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("b8129bf754490cc3c76191d8cc4ec93f"));
Arrays.asList("9cff66a321284c362f393bc4db21f756"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("591332fa0b5b22778cf820ee257049d2"));
Arrays.asList("90c8cfcf65152534c16ed81104fc3bcd"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("d3d518448b01bf0f751824b3d946cd04"));
Arrays.asList("457b8f899cf1665de61e75084dbb79d0"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("2ea18a3e8480718a80a415d3fea79f54"));
Arrays.asList("a13fe7aa3b9e8e091b3cf3442a056ec1"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}
@ -361,7 +361,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + privateTestDir + vcf + " -I " + validationDataLocation +
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
Arrays.asList("d76eacc4021b78ccc0a9026162e814a7"));
Arrays.asList("d075ad318739c8c56bdce857da1e48b9"));
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
}
@ -373,7 +373,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 20:10,000,000-10,100,000",
1,
Arrays.asList("1e0d2c15546c3b0959b00ffb75488b56"));
Arrays.asList("91c632ab17a1dd89ed19ebb20324f905"));
executeTest(String.format("test UG with base indel quality scores"), spec);
}
@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction0() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
Arrays.asList("90adefd39ed67865b0cb275ad0f07383"));
Arrays.asList("1d80e135d611fe19e1fb1882aa588a73"));
executeTest("test minIndelFraction 0.0", spec);
}
@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction25() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
Arrays.asList("2fded43949e258f8e9f68893c61c1bdd"));
Arrays.asList("752139616752902fca13c312d8fe5e22"));
executeTest("test minIndelFraction 0.25", spec);
}
@ -423,7 +423,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMinIndelFraction100() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
assessMinIndelFraction + " -minIndelFrac 1", 1,
Arrays.asList("3f07efb768e08650a7ce333edd4f9a52"));
Arrays.asList("d66b9decf26e1704abda1a919ac149cd"));
executeTest("test minIndelFraction 1.0", spec);
}
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
Arrays.asList("b62ba9777efc05af4c36e2d4ce3ee67c"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
Arrays.asList("f72ecd00b2913f63788faa7dabb1d102"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
testReducedCalling("SNP", "f059743858004ceee325f2a7761a2362");
}
@Test
public void testReducedBamINDELs() {
testReducedCalling("INDEL", "9d5418ddf1b227ae4d463995507f2b1c");
testReducedCalling("INDEL", "04845ba1ec7d8d8b0eab2ca6bdb9c1a6");
}
@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testContaminationDownsampling() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
Arrays.asList("b500ad5959bce69f888a2fac024647e5"));
executeTest("test contamination_percentage_to_filter 0.20", spec);
}

View File

@ -21,19 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "839de31b41d4186e2b12a5601525e894");
HCTest(CEUTRIO_BAM, "", "7122d4f0ef94c5274aa3047cfebe08ed");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "2b68faa0e0493d92491d74b8f731963a");
HCTest(NA12878_BAM, "", "6cd6e6787521c07a7bae98766fd628ab");
}
// TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"a2d56179cd19a41f8bfb995e225320bb");
"44df2a9da4fbd2162ae44c3f2a6ef01f");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -44,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd8d2ae8db9d98e932b0a7f345631eec");
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "4a413eeb7a75cab0ab5370b4c08dcf8e");
}
private void HCTestSymbolicVariants(String bam, String args, String md5) {
@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleSymbolic() {
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360");
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "77cf5b5273828dd1605bb23a5aeafcaa");
}
private void HCTestIndelQualityScores(String bam, String args, String md5) {
@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6380e25c1ec79c6ae2f891ced15bf4e1");
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "87ca97f90e74caee35c35616c065821c");
}
@Test
public void HCTestProblematicReadsModifiedInActiveRegions() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3a096d6139d15dcab82f5b091d08489d"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("3df42d0550b51eb9b55aac61e8b3c452"));
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
}
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a518c7436544f2b5f71c9d9427ce1cce"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4dbc72b72e3e2d9d812d5a398490e213"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void HCTestReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("8a400b0c46f41447fcc35a907e34f384"));
Arrays.asList("f8c2745bf71f2659a57494fcaa2c103b"));
executeTest("HC calling on a ReducedRead BAM", spec);
}
}

View File

@ -284,7 +284,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
protected boolean abortExecution() {
final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS);
if ( abort ) {
final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4);
final AutoFormattingTime aft = new AutoFormattingTime(engine.getRuntimeLimitInNanoseconds(), -1, 4);
logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft);
}
return abort;

View File

@ -13,6 +13,7 @@ import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.Allele;
import java.util.Arrays;
import java.util.HashMap;
@ -68,16 +69,49 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
return null;
double QD = -10.0 * vc.getLog10PError() / (double)depth;
Map<String, Object> map = new HashMap<String, Object>();
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
// adjust for the event length
int averageLengthNum = 0;
int averageLengthDenom = 0;
int refLength = vc.getReference().length();
for ( Allele a : vc.getAlternateAlleles() ) {
int numAllele = vc.getCalledChrCount(a);
int alleleSize;
if ( a.length() == refLength ) {
// SNP or MNP
byte[] a_bases = a.getBases();
byte[] ref_bases = vc.getReference().getBases();
int n_mismatch = 0;
for ( int idx = 0; idx < a_bases.length; idx++ ) {
if ( a_bases[idx] != ref_bases[idx] )
n_mismatch++;
}
alleleSize = n_mismatch;
}
else if ( a.isSymbolic() ) {
alleleSize = 1;
} else {
alleleSize = Math.abs(refLength-a.length());
}
averageLengthNum += alleleSize*numAllele;
averageLengthDenom += numAllele;
}
double averageLength = ( (double) averageLengthNum )/averageLengthDenom;
QD /= averageLength;
map.put(getKeyNames().get(1),String.format("%.2f",averageLength));
}
map.put(getKeyNames().get(0), String.format("%.2f", QD));
return map;
}
public List<String> getKeyNames() { return Arrays.asList("QD"); }
public List<String> getKeyNames() { return Arrays.asList("QD","AAL"); }
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"));
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth"),
new VCFInfoHeaderLine(getKeyNames().get(1), 1, VCFHeaderLineType.Float, "Average Allele Length"));
}

View File

@ -50,7 +50,7 @@ public class ExactCallLogger implements Cloneable {
return String.format("ExactCall %s:%d alleles=%s nSamples=%s orig.pNonRef=%.2f orig.runtime=%s",
vc.getChr(), vc.getStart(), vc.getAlleles(), vc.getNSamples(),
originalCall.getLog10PosteriorOfAFGT0(),
new AutoFormattingTime(runtime / 1e9).toString());
new AutoFormattingTime(runtime).toString());
}
}

View File

@ -1,60 +0,0 @@
/*
* The MIT License
*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.indels;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMRecordCoordinateComparator;
/**
* Extends Picard's Comparator for sorting SAMRecords by coordinate. This one actually deals with unmapped reads
* (among other things) sitting at the same position as their mates (so that they both can be put into the same set).
*/
public class SAMRecordCoordinateComparatorWithUnmappedReads extends SAMRecordCoordinateComparator {
public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
int cmp = fileOrderCompare(samRecord1, samRecord2);
if ( cmp != 0 )
return cmp;
// deal with unmapped reads
if ( samRecord1.getReadUnmappedFlag() != samRecord2.getReadUnmappedFlag() )
return (samRecord1.getReadUnmappedFlag()? 1: -1);
if ( samRecord1.getReadNegativeStrandFlag() != samRecord2.getReadNegativeStrandFlag() )
return (samRecord1.getReadNegativeStrandFlag()? 1: -1);
// even the names can be the same
cmp = samRecord1.getReadName().compareTo(samRecord2.getReadName());
if ( cmp != 0 )
return cmp;
if ( samRecord1.getDuplicateReadFlag() != samRecord2.getDuplicateReadFlag() )
return (samRecord1.getDuplicateReadFlag()? -1: 1);
if ( samRecord1.getReadPairedFlag() && samRecord2.getReadPairedFlag() && samRecord1.getFirstOfPairFlag() != samRecord2.getFirstOfPairFlag() )
return (samRecord1.getFirstOfPairFlag()? -1: 1);
// such a case was actually observed
return samRecord1.getMappingQuality() - samRecord2.getMappingQuality();
}
}

View File

@ -80,6 +80,9 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
@Argument(fullName="majorAlleleFirst",required=false,doc="Sets the major allele to be 'reference' for the bim file, rather than the ref allele")
boolean majorAlleleFirst = false;
@Argument(fullName="checkAlternateAlleles",required=false,doc="Checks that alternate alleles actually appear in samples, erroring out if they do not")
boolean checkAlternateAlleles = false;
enum OutputMode { INDIVIDUAL_MAJOR,SNP_MAJOR }
private static double APPROX_CM_PER_BP = 1000000.0/750000.0;
@ -473,7 +476,8 @@ public class VariantsToBinaryPed extends RodWalker<Integer,Integer> {
System.arraycopy(ref.getBases(), 0, observedRefBases, 0, refLength);
final Allele observedRefAllele = Allele.create(observedRefBases);
vc.validateReferenceBases(reportedRefAllele, observedRefAllele);
vc.validateAlternateAlleles();
if ( checkAlternateAlleles )
vc.validateAlternateAlleles();
}
private String getReferenceAllele(VariantContext vc) {

View File

@ -1,93 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
/**
* Represents a single amino acid.
*/
public class AminoAcid {
private String name;
private String threeLetterCode;
private String letter;
/**
* Constructor.
*
* @param letter The 1 letter code. (eg. I). This is '*' for the stop codon.
* @param name The full name of the AA (eg. Isoleucine).
* @param code The 3 letter code. (eg. Ile).
*/
public AminoAcid( String letter, String name, String code) {
this.name = name;
this.threeLetterCode = code;
this.letter = letter;
}
/** Equality based on the amino acid code. */
public boolean equals(Object o) {
if (this == o) { return true; }
if (o == null || !(o instanceof AminoAcid)) { return false; }
final AminoAcid aminoAcid = (AminoAcid) o;
return !(getCode() != null ? !getCode().equals(aminoAcid.getCode()) : aminoAcid.getCode() != null);
}
/** Hashes the three letter code. */
public int hashCode() {
return (getCode() != null ? getCode().hashCode() : 0);
}
/**
* Returns the full name of this AA.
*/
public String getName() { return name; }
/**
* Returns the 1 letter code for this AA.
*/
public String getLetter() { return letter; }
/**
* Returns the 3 letter code.
*/
public String getCode() { return threeLetterCode; }
/** Returns true if the amino acid is really just a stop codon. */
public boolean isStop() {
return "*".equals(getLetter());
}
/** Returns true if the amino acid is really just a stop codon. */
public boolean isUnknown() {
return "X".equals(getLetter());
}
public String toString() {
return name;
}
}

View File

@ -1,214 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils;
import java.util.HashMap;
/**
* A simple {codon -> amino acid name} lookup table.
* Handles differences between mitochondrial and nuclear genes.
*/
public class AminoAcidTable {
protected static final AminoAcid UNKNOWN = new AminoAcid("X" , "Unknown", "Unk");
protected static final AminoAcid ISOLEUCINE = new AminoAcid("I" , "Isoleucine", "Ile");
protected static final AminoAcid LEUCINE = new AminoAcid("L" , "Leucine", "Leu");
protected static final AminoAcid VALINE = new AminoAcid("V" , "Valine", "Val");
protected static final AminoAcid PHENYLALANINE = new AminoAcid("F" , "Phenylalanine", "Phe");
protected static final AminoAcid METHIONINE = new AminoAcid("M" , "Methionine", "Met");
protected static final AminoAcid CYSTEINE = new AminoAcid("C" , "Cysteine", "Cys");
protected static final AminoAcid ALANINE = new AminoAcid("A" , "Alanine", "Ala");
protected static final AminoAcid STOP_CODON = new AminoAcid("*" , "Stop Codon", "Stop");
protected static final AminoAcid GLYCINE = new AminoAcid("G" , "Glycine", "Gly");
protected static final AminoAcid PROLINE = new AminoAcid("P" , "Proline", "Pro");
protected static final AminoAcid THEONINE = new AminoAcid("T" , "Threonine", "Thr");
protected static final AminoAcid SERINE = new AminoAcid("S" , "Serine", "Ser");
protected static final AminoAcid TYROSINE = new AminoAcid("Y" , "Tyrosine", "Tyr");
protected static final AminoAcid TRYPTOPHAN = new AminoAcid("W" , "Tryptophan", "Trp");
protected static final AminoAcid GLUTAMINE = new AminoAcid("Q" , "Glutamine", "Gln");
protected static final AminoAcid ASPARAGINE = new AminoAcid("N" , "Asparagine", "Asn");
protected static final AminoAcid HISTIDINE = new AminoAcid("H" , "Histidine", "His");
protected static final AminoAcid GLUTAMIC_ACID = new AminoAcid("E" , "Glutamic acid", "Glu");
protected static final AminoAcid ASPARTIC_ACID = new AminoAcid("D" , "Aspartic acid", "Asp");
protected static final AminoAcid LYSINE = new AminoAcid("K" , "Lysine", "Lys");
protected static final AminoAcid ARGININE = new AminoAcid("R" , "Arginine", "Arg");
protected static HashMap<String, AminoAcid> aminoAcidTable = new HashMap<String, AminoAcid>();
protected static HashMap<String, AminoAcid> mitochondrialAminoAcidTable = new HashMap<String, AminoAcid>();
static {
//populate the tables
aminoAcidTable.put("ATT", ISOLEUCINE);
aminoAcidTable.put("ATC", ISOLEUCINE);
aminoAcidTable.put("ATA", ISOLEUCINE);
aminoAcidTable.put("CTT", LEUCINE);
aminoAcidTable.put("CTC", LEUCINE);
aminoAcidTable.put("CTA", LEUCINE);
aminoAcidTable.put("CTG", LEUCINE);
aminoAcidTable.put("TTA", LEUCINE);
aminoAcidTable.put("TTG", LEUCINE);
aminoAcidTable.put("GTT", VALINE);
aminoAcidTable.put("GTC", VALINE);
aminoAcidTable.put("GTA", VALINE);
aminoAcidTable.put("GTG", VALINE);
aminoAcidTable.put("TTT", PHENYLALANINE);
aminoAcidTable.put("TTC", PHENYLALANINE);
aminoAcidTable.put("ATG", METHIONINE);
aminoAcidTable.put("TGT", CYSTEINE);
aminoAcidTable.put("TGC", CYSTEINE);
aminoAcidTable.put("GCT", ALANINE);
aminoAcidTable.put("GCC", ALANINE);
aminoAcidTable.put("GCA", ALANINE);
aminoAcidTable.put("GCG", ALANINE);
aminoAcidTable.put("GGT", GLYCINE);
aminoAcidTable.put("GGC", GLYCINE);
aminoAcidTable.put("GGA", GLYCINE);
aminoAcidTable.put("GGG", GLYCINE);
aminoAcidTable.put("CCT", PROLINE);
aminoAcidTable.put("CCC", PROLINE);
aminoAcidTable.put("CCA", PROLINE);
aminoAcidTable.put("CCG", PROLINE);
aminoAcidTable.put("ACT", THEONINE);
aminoAcidTable.put("ACC", THEONINE);
aminoAcidTable.put("ACA", THEONINE);
aminoAcidTable.put("ACG", THEONINE);
aminoAcidTable.put("TCT", SERINE);
aminoAcidTable.put("TCC", SERINE);
aminoAcidTable.put("TCA", SERINE);
aminoAcidTable.put("TCG", SERINE);
aminoAcidTable.put("AGT", SERINE);
aminoAcidTable.put("AGC", SERINE);
aminoAcidTable.put("TAT", TYROSINE);
aminoAcidTable.put("TAC", TYROSINE);
aminoAcidTable.put("TGG", TRYPTOPHAN);
aminoAcidTable.put("CAA", GLUTAMINE);
aminoAcidTable.put("CAG", GLUTAMINE);
aminoAcidTable.put("AAT", ASPARAGINE);
aminoAcidTable.put("AAC", ASPARAGINE);
aminoAcidTable.put("CAT", HISTIDINE);
aminoAcidTable.put("CAC", HISTIDINE);
aminoAcidTable.put("GAA", GLUTAMIC_ACID);
aminoAcidTable.put("GAG", GLUTAMIC_ACID);
aminoAcidTable.put("GAT", ASPARTIC_ACID);
aminoAcidTable.put("GAC", ASPARTIC_ACID);
aminoAcidTable.put("AAA", LYSINE);
aminoAcidTable.put("AAG", LYSINE);
aminoAcidTable.put("CGT", ARGININE);
aminoAcidTable.put("CGC", ARGININE);
aminoAcidTable.put("CGA", ARGININE);
aminoAcidTable.put("CGG", ARGININE);
aminoAcidTable.put("AGA", ARGININE);
aminoAcidTable.put("AGG", ARGININE);
aminoAcidTable.put("TAA", STOP_CODON );
aminoAcidTable.put("TAG", STOP_CODON);
aminoAcidTable.put("TGA", STOP_CODON);
//populate the mitochondrial AA table
mitochondrialAminoAcidTable.putAll(aminoAcidTable);
mitochondrialAminoAcidTable.put("AGA", STOP_CODON);
mitochondrialAminoAcidTable.put("AGG", STOP_CODON);
mitochondrialAminoAcidTable.put("ATA", METHIONINE);
mitochondrialAminoAcidTable.put("TGA", TRYPTOPHAN);
}
/**
* Returns the amino acid encoded by the given codon in a eukaryotic genome.
*
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
*
* @return The amino acid matching the given codon, or the UNKNOWN amino acid if the codon string doesn't match anything
*/
public static AminoAcid getEukaryoticAA(String codon) {
codon = codon.toUpperCase();
final AminoAcid aa = aminoAcidTable.get(codon);
return aa == null ? UNKNOWN : aa;
}
/**
* Returns the amino acid encoded by the given codon in a mitochondrial genome.
*
* @param codon The 3-letter mRNA nucleotide codon 5' to 3'. Expects T's instead of U's. Not case sensitive.
* @param isFirstCodon If this is the 1st codon in the gene, then "ATT" encodes Methyonine
*
* @return The amino acid matching the given codon in mitochondrial genes, or the UNKNOWN amino acid if the codon string doesn't match anything
*/
public static AminoAcid getMitochondrialAA(String codon, boolean isFirstCodon) {
codon = codon.toUpperCase();
final AminoAcid aa = mitochondrialAminoAcidTable.get(codon);
if(aa == null) {
return UNKNOWN;
} else if(isFirstCodon && codon.equals("ATT")) {
return METHIONINE; //special case - 'ATT' in the first codon of a mitochondrial gene codes for methionine instead of isoleucine
} else {
return aa;
}
}
}

View File

@ -1,32 +1,104 @@
package org.broadinstitute.sting.utils;
import java.util.concurrent.TimeUnit;
/**
* Simple utility class that makes it convenient to print unit adjusted times
* Conveniently print a time with an automatically determined time unit
*
* For example, if the amount of time is 10^6 seconds, instead of printing
* out 10^6 seconds, prints out 11.57 days instead.
*
* Dynamically uses time units:
*
* - seconds: s
* - minutes: m
* - hours : h
* - days : d
* - weeks : w
*
* @author depristo
* @since 2009
*/
public class AutoFormattingTime {
private static final double NANOSECONDS_PER_SECOND = 1e9;
/**
* Width a la format's %WIDTH.PERCISIONf
*/
private final int width; // for format
/**
* Precision a la format's %WIDTH.PERCISIONf
*/
private final int precision; // for format
double timeInSeconds; // in Seconds
private final String formatString;
/**
* The elapsed time in nanoseconds
*/
private final long nanoTime;
/**
* Create a new autoformatting time with elapsed time nanoTime in nanoseconds
* @param nanoTime the elapsed time in nanoseconds
* @param width the width >= 0 (a la format's %WIDTH.PERCISIONf) to use to display the format, or -1 if none is required
* @param precision the precision to display the time at. Must be >= 0;
*/
public AutoFormattingTime(final long nanoTime, final int width, int precision) {
if ( width < -1 ) throw new IllegalArgumentException("Width " + width + " must be >= -1");
if ( precision < 0 ) throw new IllegalArgumentException("Precision " + precision + " must be >= 0");
public AutoFormattingTime(double timeInSeconds, final int width, int precision) {
this.width = width;
this.timeInSeconds = timeInSeconds;
this.nanoTime = nanoTime;
this.precision = precision;
this.formatString = "%" + width + "." + precision + "f %s";
}
public AutoFormattingTime(double timeInSeconds, int precision) {
this(timeInSeconds, 6, precision);
/**
* @see #AutoFormattingTime(long, int, int) but with default width and precision
* @param nanoTime
*/
public AutoFormattingTime(final long nanoTime) {
this(nanoTime, 6, 1);
}
/**
* @see #AutoFormattingTime(long, int, int) but with time specificied as a double in seconds
*/
public AutoFormattingTime(final double timeInSeconds, final int width, final int precision) {
this(secondsToNano(timeInSeconds), width, precision);
}
/**
* @see #AutoFormattingTime(long) but with time specificied as a double in seconds
*/
public AutoFormattingTime(double timeInSeconds) {
this(timeInSeconds, 1);
this(timeInSeconds, 6, 1);
}
/**
* Precomputed format string suitable for string.format with the required width and precision
*/
private String getFormatString() {
final StringBuilder b = new StringBuilder("%");
if ( width != -1 )
b.append(width);
b.append(".").append(precision).append("f %s");
return b.toString();
}
/**
* Get the time associated with this object in nanoseconds
* @return the time in nanoseconds
*/
public long getTimeInNanoSeconds() {
return nanoTime;
}
/**
* Get the time associated with this object in seconds, as a double
* @return time in seconds as a double
*/
public double getTimeInSeconds() {
return timeInSeconds;
return TimeUnit.NANOSECONDS.toSeconds(getTimeInNanoSeconds());
}
/**
@ -44,15 +116,16 @@ public class AutoFormattingTime {
}
/**
* Instead of 10000 s, returns 2.8 hours
* @return
* Get a string representation of this time, automatically converting the time
* to a human readable unit with width and precision provided during construction
* @return a non-null string
*/
public String toString() {
double unitTime = timeInSeconds;
double unitTime = getTimeInSeconds();
String unit = "s";
if ( timeInSeconds > 120 ) {
unitTime = timeInSeconds / 60; // minutes
if ( unitTime > 120 ) {
unitTime /= 60; // minutes
unit = "m";
if ( unitTime > 120 ) {
@ -64,13 +137,24 @@ public class AutoFormattingTime {
unit = "d";
if ( unitTime > 20 ) {
unitTime /= 7; // days
unitTime /= 7; // weeks
unit = "w";
}
}
}
}
return String.format(formatString, unitTime, unit);
return String.format(getFormatString(), unitTime, unit);
}
/**
* Convert a time in seconds as a double into nanoseconds as a long
* @param timeInSeconds an elapsed time in seconds, as a double
* @return an equivalent value in nanoseconds as a long
*/
private static long secondsToNano(final double timeInSeconds) {
return (long)(NANOSECONDS_PER_SECOND * timeInSeconds);
}
}

View File

@ -287,6 +287,11 @@ public final class GenomeLocParser {
return new GenomeLoc(contig, index, start, stop);
}
public GenomeLoc createGenomeLocOnContig(final String contig, final int start, final int stop) {
GenomeLoc contigLoc = createOverEntireContig(contig);
return new GenomeLoc(contig, getContigIndex(contig), start, stop).intersect(contigLoc);
}
/**
* validate a position or interval on the genome as valid
*

View File

@ -14,7 +14,8 @@ public class QualityUtils {
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
public final static double MIN_REASONABLE_ERROR = 0.0001;
public final static byte MAX_REASONABLE_Q_SCORE = 60; // quals above this value are extremely suspicious
public final static byte MAX_REASONABLE_Q_SCORE = 60; // bams containing quals above this value are extremely suspicious and we should warn the user
public final static byte MAX_GATK_USABLE_Q_SCORE = 40; // quals above this value should be capped down to this value (because they are too high)
public final static byte MIN_USABLE_Q_SCORE = 6;
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
@ -73,7 +74,7 @@ public class QualityUtils {
}
public static double qualToErrorProb(final double qual) {
return Math.pow(10.0, ((double) qual)/-10.0);
return Math.pow(10.0, qual/-10.0);
}

View File

@ -31,7 +31,7 @@ public class ActiveRegion implements HasGenomeLocation {
this.isActive = isActive;
this.genomeLocParser = genomeLocParser;
this.extension = extension;
extendedLoc = genomeLocParser.createGenomeLoc(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
fullExtentReferenceLoc = extendedLoc;
}

View File

@ -1,231 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.collections;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/** This class, closely resembling a deque (except that it is not dynamically grown),
* provides an object with array-like interface and efficient
* implementation of shift operation. Use this class when some kind of sliding window is required:
* e.g. an array (window) is populated from some stream of data, and then the window is shifted.
* If most of the data in the window remains the same so that only a few old elements sjould be popped from
* and a few new elements pushed onto the array, both re-populating the whole array from the data and
* shifting a regular array would be grossly inefficient. Instead, shiftData(int N) method of circular array
* efficiently pops out N first elements and makes last N elements available.
*
* Consider an example of reading a character stream A,B,C,D,....,Z into an array with requirement of keeping
* last 5 letters. First, we would read first 5 letters same way as we would with a regular array:<br><br>
*
* <code>
* CircularArray a(5);<br>
* for ( int i = 0; i < 5; i++ ) a.set(i, readChar());<br>
* </code>
* <br>
* and then on the arrival of each next character we shift the array:<br><br>
*
* <code>
* a.shiftData(1); a.set(4, readChar() );<br>
* </code>
* <br>
* After the lines from the above example are executed, the array will <i>logically</i> look as:<br>
*
* B,C,D,E,F,<br><br>
*
* e.g. as if we had a regular array, shifted it one element down and added new element on the top.
*
*
* @author asivache
*
*/
public class CircularArray <T> {
private Object[] data ;
private int offset;
/** Creates an array of fixed length */
public CircularArray(int length) {
if ( length <= 0 ) throw new ReviewedStingException("CircularArray length must be positive. Passed: "+length);
data = new Object[length];
offset = 0;
}
/** Returns length of the array */
public int length() {
return data.length;
}
/** Gets i-th element of the array
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
@SuppressWarnings("unchecked")
public T get(int i) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; element requested: "+i);
return (T)(data [ ( offset + i ) % data.length ]);
}
/** Sets i-th element of the array to the specified value.
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
public void set(int i, T value) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; set element request at: "+i);
data [ ( offset + i ) % data.length ] = value;
}
/** Set all elements to null.
*
*/
public void clear() {
for ( int i = 0 ; i < data.length ; i++ ) data[i] = null;
offset = 0;
}
/** Efficient shift-down of the array data. After this operation, array.get(0), array.get(1), etc will
* be returning what array.get(shift), array.get(shift+1),... were returning before the shift was performed,
* and last shift elements of the array will be reset to 0.
* @param shift
*/
public void shiftData(int shift) {
if ( shift >= data.length ) {
// if we shift by more than the length of stored data, we lose
// all that data completely, so we just re-initialize the array.
// This is not the operating mode CircularArray is intended for
// but we can handle it, just in case.
for ( int i = 0 ; i < data.length ; i++ ) data[i] = null;
offset = 0;
return;
}
// shift < data.length, so at least some data should be preserved
final int newOffset = ( offset+shift ) % data.length;
if ( newOffset < offset ) {
// wrap-around!
for ( int i = offset ; i < data.length ; i++ ) data[i] = null;
for ( int i = 0; i < newOffset ; i++ ) data[i] = null;
} else {
for ( int i = offset ; i < newOffset ; i++ ) data[i] = null;
}
offset = newOffset;
}
/** Implements primitive int type-based circular array. See CircularArray for details.
*
* @author asivache
*
*/
public static class Int {
private int [] data ;
private int offset;
/** Creates an array of fixed length */
public Int(int length) {
if ( length <= 0 ) throw new ReviewedStingException("CircularArray length must be positive. Passed: "+length);
data = new int[length]; // automaticaly initialized to zeros
offset = 0;
}
/** Returns length of the array */
public int length() {
return data.length;
}
/** Gets i-th element of the array
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
public int get(int i) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; element requested: "+i);
return data [ ( offset + i ) % data.length ];
}
/** Sets i-th element of the array to the specified value.
*
* @throws IndexOutOfBoundsException if value of i is illegal
*/
public void set(int i, int value) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; set element request at: "+i);
data [ ( offset + i ) % data.length ] = value;
}
/** Increments i-th element of the array by the specified value (value can be negative).
*
* @throws IndexOutOfBoundsException if i is illegal
*/
public void increment(int i, int value) {
if ( i < 0 || i >= data.length )
throw new IndexOutOfBoundsException("Length of CircularArray: "+data.length+"; increment element request at: "+i);
data [ ( offset + i ) % data.length ] += value;
}
/** Set all elements to 0.
*
*/
public void clear() {
for ( int i = 0 ; i < data.length ; i++ ) data[i] = 0;
offset = 0;
}
/** Efficient shift-down of the array data. After this operation, array.get(0), array.get(1), etc will
* be returning what array.get(shift), array.get(shift+1),... were returning before the shift was performed,
* and last shift elements of the array will be reset to 0.
* @param shift
*/
public void shiftData(int shift) {
if ( shift >= data.length ) {
// if we shift by more than the length of stored data, we lose
// all that data completely, so we just re-initialize the array.
// This is not the operating mode CircularArray is intended for
// but we can handle it, just in case.
for ( int i = 0 ; i < data.length ; i++ ) data[i] = 0;
offset = 0;
return;
}
// shift < data.length, so at least some data should be preserved
final int newOffset = ( offset+shift ) % data.length;
if ( newOffset < offset ) {
// wrap-around!
for ( int i = offset ; i < data.length ; i++ ) data[i] = 0;
for ( int i = 0; i < newOffset ; i++ ) data[i] = 0;
} else {
for ( int i = offset ; i < newOffset ; i++ ) data[i] = 0;
}
offset = newOffset;
}
}
}

View File

@ -1,195 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.interval;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Oct 7, 2010
* Time: 2:40:02 PM
* To change this template use File | Settings | File Templates.
*/
/** This class provides an adapter to Iterator<GenomeLoc> that returns only (parts of) underlying iterator's
* intervals overlapping with specified "master set" of bounding intervals. The underlying iterator must return
* NON-overlapping intervals in coordinate-sorted order, otherwise the behavior is unspecified. If the master set is represented by
* another interval iterator, it should return sorted and NON-overlapping intervals.
*
*/
public class OverlappingIntervalIterator implements Iterator<GenomeLoc> {
PushbackIterator<GenomeLoc> iter = null;
PushbackIterator<GenomeLoc> boundBy = null;
GenomeLoc prefetchedOverlap = null;
GenomeLoc currentBound = null;
GenomeLoc currentInterval = null;
/** Creates new overlapping iterator that will internally traverse <code>intervals</code> and return only
* overlaps of those with set of intervals returned by <code>boundBy</code>.
* @param intervals
* @param boundBy
*/
public OverlappingIntervalIterator(Iterator<GenomeLoc> intervals, Iterator<GenomeLoc> boundBy) {
this.iter = new PushbackIterator<GenomeLoc>(intervals);
this.boundBy = new PushbackIterator<GenomeLoc>(boundBy);
if ( iter.hasNext() && boundBy.hasNext() ) {
currentInterval = iter.next(); // load first interval
currentBound = boundBy.next(); // load first bounding interval
fetchNextOverlap();
}
}
/** Traverses both iterators in sync, until the first overlap between the two is reached. If no overlap is found
* until the end of the either of the two streams, leaves prefetchedOverlap set to null
*/
private void fetchNextOverlap() {
prefetchedOverlap = null;
// System.out.println("Fetching... (interval="+currentInterval+"; bound="+currentBound+")");
while ( currentInterval != null && currentBound != null ) {
if ( currentInterval.isBefore(currentBound) ) {
// System.out.println(currentInterval +" is before "+currentBound );
if ( ! iter.hasNext() ) currentInterval = null;
else currentInterval = iter.next();
continue;
}
if ( currentInterval.isPast(currentBound) ) {
// System.out.println(currentInterval +" is past "+currentBound );
if ( ! boundBy.hasNext() ) currentBound = null;
else currentBound = boundBy.next();
continue;
}
// we are at this point only if currentInterval overlaps with currentBound
prefetchedOverlap = currentInterval.intersect(currentBound);
// System.out.println("Fetched next overlap: "+prefetchedOverlap);
// now we need to advance at least one of the iterators, so that we would not
// call the same overlap again
// however we still do not know if we are done with either current interval or current bound, because
// two special situations are possible:
//
// 1) next interval overlaps with 2) current interval also overlaps with
// the same bounding interval; next bounding interval; note that
// note that in this case next in this case next bound necessarily
// interval necessarily starts before starts before the next interval
// the next bound
//
// curr. int next int. curr. int
// ----- ------ --------------------------
// ------------------- --------- -------------
// curr. bound curr. bound next bound
// To solve this issue we update either only currentInterval or only currentBound to their next value,
// whichever of those next values (intervals) comes first on the reference genome;
// the rest of the traversal to the next overlap will be performed on the next invocation of
// fetchNextOverlap().
advanceToNearest();
break; // now that we computed the overlap and advanced (at least one of) the intervals/bounds to
// the next location, we are done - bail out from the loop.
}
}
private void advanceToNearest() {
if ( ! iter.hasNext() ) {
currentBound = boundBy.hasNext() ? boundBy.next() : null;
} else {
if ( ! boundBy.hasNext() ) currentInterval = iter.hasNext() ? iter.next() : null;
else {
// both intervals and bounds have next value available; let's check which comes first:
GenomeLoc nextInterval = iter.next();
GenomeLoc nextBound = boundBy.next();
if ( nextInterval.compareTo(nextBound) < 0 ) {
currentInterval = nextInterval;
boundBy.pushback(nextBound);
} else {
currentBound = nextBound;
iter.pushback(nextInterval);
}
}
}
}
/**
* Returns <tt>true</tt> if the iteration has more elements. (In other
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
* rather than throwing an exception.)
*
* @return <tt>true</tt> if the iterator has more elements.
*/
public boolean hasNext() {
return prefetchedOverlap != null;
}
/**
* Returns the next element in the iteration.
*
* @return the next element in the iteration.
* @throws java.util.NoSuchElementException
* iteration has no more elements.
*/
public GenomeLoc next() {
if ( prefetchedOverlap == null )
throw new java.util.NoSuchElementException("Illegal call to next(): Overlapping iterator has no more overlaps");
GenomeLoc ret = prefetchedOverlap; // cache current prefetched overlap
fetchNextOverlap(); // prefetch next overlap
return ret ;
}
/**
* Removes from the underlying collection the last element returned by the
* iterator (optional operation). This method can be called only once per
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
* the underlying collection is modified while the iteration is in
* progress in any way other than by calling this method.
*
* @throws UnsupportedOperationException if the <tt>remove</tt>
* operation is not supported by this Iterator.
* @throws IllegalStateException if the <tt>next</tt> method has not
* yet been called, or the <tt>remove</tt> method has already
* been called after the last call to the <tt>next</tt>
* method.
*/
public void remove() {
throw new UnsupportedOperationException("remove() method is not supported by OverlappingIntervalIterator");
//To change body of implemented methods use File | Settings | File Templates.
}
}

View File

@ -76,7 +76,7 @@ public class BaseRecalibration {
quantizationInfo = recalibrationReport.getQuantizationInfo();
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
quantizationInfo.noQuantization();
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wnats to use what's in the report.
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wants to use what's in the report.
quantizationInfo.quantizeQualityScores(quantizationLevels);
this.disableIndelQuals = disableIndelQuals;
@ -233,9 +233,9 @@ public class BaseRecalibration {
* Note that this calculation is a constant for each rgKey and errorModel. We need only
* compute this value once for all data.
*
* @param rgKey
* @param errorModel
* @return
* @param rgKey read group key
* @param errorModel the event type
* @return global delta Q
*/
private double calculateGlobalDeltaQ(final int rgKey, final EventType errorModel) {
double result = 0.0;

View File

@ -175,8 +175,7 @@ public class QualQuantizer {
}
/**
* Human readable name of this interval: e.g., 10-12
* @return
* @return Human readable name of this interval: e.g., 10-12
*/
public String getName() {
return qStart + "-" + qEnd;
@ -188,8 +187,7 @@ public class QualQuantizer {
}
/**
* Returns the error rate (in real space) of this interval, or 0 if there are no obserations
* @return
* @return the error rate (in real space) of this interval, or 0 if there are no observations
*/
@Ensures("result >= 0.0")
public double getErrorRate() {
@ -202,9 +200,7 @@ public class QualQuantizer {
}
/**
* Returns the QUAL of the error rate of this interval, or the fixed
* qual if this interval was created with a fixed qual.
* @return
* @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual.
*/
@Ensures("result >= 0 && result <= QualityUtils.MAX_QUAL_SCORE")
public byte getQual() {
@ -254,7 +250,7 @@ public class QualQuantizer {
final QualInterval right = this.compareTo(toMerge) < 0 ? toMerge : this;
if ( left.qEnd + 1 != right.qStart )
throw new ReviewedStingException("Attempting to merge non-continguous intervals: left = " + left + " right = " + right);
throw new ReviewedStingException("Attempting to merge non-contiguous intervals: left = " + left + " right = " + right);
final long nCombinedObs = left.nObservations + right.nObservations;
final long nCombinedErr = left.nErrors + right.nErrors;
@ -343,8 +339,7 @@ public class QualQuantizer {
}
/**
* Helper function that finds and mergest together the lowest penalty pair
* of intervals
* Helper function that finds and merges together the lowest penalty pair of intervals
* @param intervals
*/
@Requires("! intervals.isEmpty()")

View File

@ -28,9 +28,10 @@ package org.broadinstitute.sting.utils.recalibration;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.commons.math.optimization.fitting.GaussianFunction;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.Random;
/**
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
@ -149,7 +150,7 @@ public class RecalDatum {
//---------------------------------------------------------------------------------------------------------------
/**
* Returns the error rate (in real space) of this interval, or 0 if there are no obserations
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
* @return the empirical error rate ~= N errors / N obs
*/
@Ensures("result >= 0.0")
@ -262,6 +263,9 @@ public class RecalDatum {
@Requires("empiricalQuality == UNINITIALIZED")
@Ensures("empiricalQuality != UNINITIALIZED")
private synchronized void calcEmpiricalQuality() {
// TODO -- add code for Bayesian estimate of Qemp here
final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
empiricalQuality = Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
}
@ -276,4 +280,65 @@ public class RecalDatum {
private double calcExpectedErrors() {
return getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
}
}
static final boolean DEBUG = false;
static final double RESOLUTION_BINS_PER_QUAL = 1.0;
static public double bayesianEstimateOfEmpiricalQuality(final double nObservations, final double nErrors, final double QReported) {
final int numBins = (QualityUtils.MAX_REASONABLE_Q_SCORE + 1) * (int)RESOLUTION_BINS_PER_QUAL;
final double[] log10Posteriors = new double[numBins];
for ( int bin = 0; bin < numBins; bin++ ) {
final double QEmpOfBin = bin / RESOLUTION_BINS_PER_QUAL;
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10Likelihood(QEmpOfBin, nObservations, nErrors);
if ( DEBUG )
System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
}
if ( DEBUG )
System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
final double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10Posteriors);
final int MLEbin = MathUtils.maxElementIndex(normalizedPosteriors);
final double Qemp = MLEbin / RESOLUTION_BINS_PER_QUAL;
return Qemp;
}
static final double[] log10QempPriorCache = new double[QualityUtils.MAX_GATK_USABLE_Q_SCORE + 1];
static {
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
final double GF_a = 0.0;
final double GF_b = 0.9;
final double GF_c = 0.0;
final double GF_d = 0.5;
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
for ( int i = 0; i <= QualityUtils.MAX_GATK_USABLE_Q_SCORE; i++ )
log10QempPriorCache[i] = Math.log10(gaussian.value((double) i));
}
static public double log10QempPrior(final double Qempirical, final double Qreported) {
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), QualityUtils.MAX_GATK_USABLE_Q_SCORE);
if ( DEBUG )
System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
return log10QempPriorCache[difference];
}
static public double log10Likelihood(final double Qempirical, final double nObservations, final double nErrors) {
// this is just a straight binomial PDF
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10((byte)(int)Qempirical));
if ( log10Prob == Double.NEGATIVE_INFINITY )
log10Prob = -Double.MAX_VALUE;
if ( DEBUG )
System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
return log10Prob;
}
}

View File

@ -9,8 +9,7 @@ import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
import java.io.File;
import java.io.PrintStream;
import java.io.*;
import java.util.*;
/**
@ -199,7 +198,7 @@ public class RecalibrationReport {
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
final double nObservations = asDouble(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
final double nErrors = asDouble(reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME));
final double empiricalQuality = (Double) reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME);
final double empiricalQuality = asDouble(reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME));
// the estimatedQreported column only exists in the ReadGroup table
final double estimatedQReported = hasEstimatedQReportedColumn ?

View File

@ -236,18 +236,6 @@ public class AlignmentUtils {
return n;
}
public static int getNumAlignedBases(final SAMRecord r) {
int n = 0;
final Cigar cigar = r.getCigar();
if (cigar == null) return 0;
for (final CigarElement e : cigar.getCigarElements())
if (e.getOperator() == CigarOperator.M)
n += e.getLength();
return n;
}
public static int getNumAlignedBasesCountingSoftClips(final SAMRecord r) {
int n = 0;
final Cigar cigar = r.getCigar();
@ -512,47 +500,6 @@ public class AlignmentUtils {
return true;
}
/**
* Returns true is read is mapped and mapped uniquely (Q>0).
*
* @param read
* @return
*/
public static boolean isReadUniquelyMapped(SAMRecord read) {
return (!AlignmentUtils.isReadUnmapped(read)) && read.getMappingQuality() > 0;
}
/**
* Returns the array of base qualitites in the order the bases were read on the machine (i.e. always starting from
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array
* of read's base qualitites is inverted (in this case new array is allocated and returned).
*
* @param read
* @return
*/
public static byte[] getQualsInCycleOrder(SAMRecord read) {
if (isReadUnmapped(read) || !read.getReadNegativeStrandFlag()) return read.getBaseQualities();
return Utils.reverse(read.getBaseQualities());
}
/**
* Returns the array of original base qualitites (before recalibration) in the order the bases were read on the machine (i.e. always starting from
* cycle 1). In other words, if the read is unmapped or aligned in the forward direction, the read's own base
* qualities are returned as stored in the SAM record; if the read is aligned in the reverse direction, the array
* of read's base qualitites is inverted (in this case new array is allocated and returned). If no original base qualities
* are available this method will throw a runtime exception.
*
* @param read
* @return
*/
public static byte[] getOriginalQualsInCycleOrder(SAMRecord read) {
if (isReadUnmapped(read) || !read.getReadNegativeStrandFlag()) return read.getOriginalBaseQualities();
return Utils.reverse(read.getOriginalBaseQualities());
}
/**
* Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>
* starting at 0-based position <code>refIndex</code> on the <code>refSeq</code> and specified by its <code>cigar</code>.

View File

@ -1,68 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMRecord;
public class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
private SAMRecord record;
public ComparableSAMRecord(SAMRecord record) {
this.record = record;
}
public SAMRecord getRecord() {
return record;
}
public int compareTo(ComparableSAMRecord o) {
// first sort by start position -- with not coverflow because both are guaranteed to be positive.
int comparison = record.getAlignmentStart() - o.record.getAlignmentStart();
// if the reads have the same start position, we must give a non-zero comparison
// (because java Sets often require "consistency with equals")
if ( comparison == 0 )
comparison = record.getReadName().compareTo(o.getRecord().getReadName());
// if the read names are the same, use the first of the pair if appropriate
if ( comparison == 0 && record.getReadPairedFlag() )
comparison = ( record.getFirstOfPairFlag() ? -1 : 1);
return comparison;
}
public boolean equals(Object obj) {
if ( !(obj instanceof ComparableSAMRecord) )
return false;
if ( this == obj )
return true;
ComparableSAMRecord csr = (ComparableSAMRecord)obj;
if(record.getAlignmentStart() != csr.record.getAlignmentStart())
return false;
if ( !record.getReadName().equals(csr.getRecord().getReadName()) )
return false;
return ( record.getFirstOfPairFlag() == csr.record.getFirstOfPairFlag() );
}
}

View File

@ -133,7 +133,7 @@ public class ThreadEfficiencyMonitor {
*/
public synchronized void printUsageInformation(final Logger logger, final Priority priority) {
logger.debug("Number of threads monitored: " + getnThreadsAnalyzed());
logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toSeconds(getTotalTime())));
logger.debug("Total runtime " + new AutoFormattingTime(TimeUnit.MILLISECONDS.toNanos(getTotalTime())));
for ( final State state : State.values() ) {
logger.debug(String.format("\tPercent of time spent %s is %.2f", state.getUserFriendlyName(), getStatePercent(state)));
}

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* UnitTests for the AutoFormatting
*
* User: depristo
* Date: 8/24/12
* Time: 11:25 AM
* To change this template use File | Settings | File Templates.
*/
public class AutoFormattingTimeUnitTest extends BaseTest {
@DataProvider(name = "AutoFormattingTimeUnitSelection")
public Object[][] makeTimeData() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{TimeUnit.SECONDS.toNanos(10), "s"});
tests.add(new Object[]{TimeUnit.MINUTES.toNanos(10), "m"});
tests.add(new Object[]{TimeUnit.HOURS.toNanos(10), "h"});
tests.add(new Object[]{TimeUnit.DAYS.toNanos(10), "d"});
tests.add(new Object[]{TimeUnit.DAYS.toNanos(1000), "w"});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "AutoFormattingTimeUnitSelection")
public void testUnitSelection(final long nano, final String expectedUnit) throws InterruptedException {
final AutoFormattingTime time = new AutoFormattingTime(nano);
testBasic(time, nano, time.getWidth(), time.getPrecision());
Assert.assertTrue(time.toString().endsWith(expectedUnit), "TimeUnit " + time.toString() + " didn't contain expected time unit " + expectedUnit);
}
@Test(dataProvider = "AutoFormattingTimeUnitSelection")
public void testSecondsAsDouble(final long nano, final String expectedUnit) throws InterruptedException {
final double inSeconds = nano * 1e-9;
final long nanoFromSeconds = (long)(inSeconds * 1e9);
final AutoFormattingTime time = new AutoFormattingTime(inSeconds);
testBasic(time, nanoFromSeconds, time.getWidth(), time.getPrecision());
}
@DataProvider(name = "AutoFormattingTimeWidthAndPrecision")
public Object[][] makeTimeWidthAndPrecision() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( final int width : Arrays.asList(-1, 1, 2, 6, 20) ) {
for ( final int precision : Arrays.asList(1, 2) ) {
tests.add(new Object[]{100.123456 * 1e9, width, precision});
tests.add(new Object[]{0.123456 * 1e9, width, precision});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "AutoFormattingTimeWidthAndPrecision")
public void testWidthAndPrecision(final double inSeconds, final int width, final int precision) throws InterruptedException {
final AutoFormattingTime time = new AutoFormattingTime(inSeconds, width, precision);
final long nanoFromSeconds = (long)(inSeconds * 1e9);
testBasic(time, nanoFromSeconds, width, precision);
final Matcher match = matchToString(time);
match.matches();
final String widthString = match.group(1);
final String precisionString = match.group(2);
if ( width != -1 ) {
final int actualWidth = widthString.length() + 1 + precisionString.length();
Assert.assertTrue(actualWidth >= width, "width string '" + widthString + "' not >= the expected width " + width);
}
Assert.assertEquals(precisionString.length(), precision, "precision string '" + precisionString + "' not the expected precision " + precision);
}
private static Matcher matchToString(final AutoFormattingTime time) {
Pattern pattern = Pattern.compile("(\\s*\\d*)\\.(\\d*) \\w");
return pattern.matcher(time.toString());
}
private static void testBasic(final AutoFormattingTime aft, final long nano, final int expectedWidth, final int expectedPrecision) {
Assert.assertEquals(aft.getTimeInNanoSeconds(), nano);
assertEqualsDoubleSmart(aft.getTimeInSeconds(), nano * 1e-9, 1e-3, "Time in seconds not within tolerance of nanoSeconds");
Assert.assertEquals(aft.getWidth(), expectedWidth);
Assert.assertEquals(aft.getPrecision(), expectedPrecision);
Assert.assertNotNull(aft.toString(), "TimeUnit toString returned null");
final Matcher match = matchToString(aft);
Assert.assertTrue(match.matches(), "toString " + aft.toString() + " doesn't match our expected format");
}
}

View File

@ -301,7 +301,11 @@ public abstract class BaseTest {
Assert.assertTrue(actualSet.equals(expectedSet), info); // note this is necessary due to testng bug for set comps
}
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
public static void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
assertEqualsDoubleSmart(actual, expected, tolerance, null);
}
public static void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance, final String message) {
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
Assert.assertTrue(Double.isNaN(actual), "expected is nan, actual is not");
else if ( Double.isInfinite(expected) ) // NaN == NaN => false unfortunately
@ -309,7 +313,9 @@ public abstract class BaseTest {
else {
final double delta = Math.abs(actual - expected);
final double ratio = Math.abs(actual / expected - 1.0);
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual + " not within tolerance " + tolerance);
Assert.assertTrue(delta < tolerance || ratio < tolerance, "expected = " + expected + " actual = " + actual
+ " not within tolerance " + tolerance
+ (message == null ? "" : "message: " + message));
}
}
}

View File

@ -45,7 +45,7 @@ import java.util.*;
* Test the Active Region Traversal Contract
* http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
*/
public class TraverseActiveRegionsTest extends BaseTest {
public class TraverseActiveRegionsUnitTest extends BaseTest {
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
private final double prob;
@ -103,7 +103,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
private List<GenomeLoc> intervals;
private static final String testBAM = "TraverseActiveRegionsTest.bam";
private static final String testBAM = "TraverseActiveRegionsUnitTest.bam";
private static final String testBAI = "TraverseActiveRegionsUnitTest.bai";
@BeforeClass
private void init() throws FileNotFoundException {
@ -117,7 +118,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
// TODO: reads which are partially between intervals (in/outside extension)
// TODO: duplicate reads
// TODO: should we assign reads which are completely outside intervals but within extension?
// TODO: reads which are completely outside intervals but within extension
// TODO: test the extension itself
intervals = new ArrayList<GenomeLoc>();
@ -148,6 +150,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
private void createBAM(List<GATKSAMRecord> reads) {
File outFile = new File(testBAM);
outFile.deleteOnExit();
File indexFile = new File(testBAI);
indexFile.deleteOnExit();
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
@ -237,6 +241,21 @@ public class TraverseActiveRegionsTest extends BaseTest {
Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
}
@Test
public void testActiveRegionExtensionOnContig() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
for (ActiveRegion activeRegion : activeRegions) {
GenomeLoc loc = activeRegion.getExtendedLoc();
// Contract: active region extensions must stay on the contig
Assert.assertTrue(loc.getStart() > 0, "Active region extension begins at location " + loc.getStart() + ", past the left end of the contig");
int refLen = dictionary.getSequence(loc.getContigIndex()).getSequenceLength();
Assert.assertTrue(loc.getStop() <= refLen, "Active region extension ends at location " + loc.getStop() + ", past the right end of the contig");
}
}
@Test
public void testPrimaryReadMapping() {
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();

View File

@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("fbfbd4d13b7ba3d76e8e186902e81378"));
Arrays.asList("a127623a26bac4c17c9df491e170ed88"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("19aef8914efc497192f89a9038310ca5"));
Arrays.asList("13e24e6b9dfa241df5baa2c3f53415b9"));
executeTest("test file has annotations, asking for annotations, #2", spec);
}
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("4f0b8033da18e6cf6e9b8d5d36c21ba2"));
Arrays.asList("07cb4d427235878aeec0066d7d298e54"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
Arrays.asList("64ca176d587dfa2b3b9dec9f7999305c"));
Arrays.asList("e579097677d5e56a5776151251947961"));
executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
}
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant " + privateTestDir + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
Arrays.asList("f33f417fad98c05d9cd08ffa22943b0f"));
Arrays.asList("348314945436ace71ce6b1a52559d9ee"));
executeTest("test exclude annotations", spec);
}
@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testOverwritingHeader() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
Arrays.asList("0c810f6c4abef9d9dc5513ca872d3d22"));
Arrays.asList("ae7930e37a66c0aa4cfe0232736864fe"));
executeTest("test overwriting header", spec);
}
@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoReads() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("1c423b7730b9805e7b885ece924286e0"));
Arrays.asList("a0ba056c2625033e5e859fd6bcec1256"));
executeTest("not passing it any reads", spec);
}
@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("54d7d5bb9404652857adf5e50d995f30"));
Arrays.asList("0be7da17340111a94e8581ee3808c88a"));
executeTest("getting DB tag with dbSNP", spec);
}
@ -114,7 +114,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testMultipleIdsWithDbsnp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --alwaysAppendDbsnpId --dbsnp " + b36dbSNP129 + " -G Standard --variant " + privateTestDir + "vcfexample3withIDs.vcf -L " + privateTestDir + "vcfexample3withIDs.vcf", 1,
Arrays.asList("5fe63e511061ed4f91d938e72e7e3c39"));
Arrays.asList("e40e625302a496ede42eed61c2ce524b"));
executeTest("adding multiple IDs with dbSNP", spec);
}
@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTagWithHapMap() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --comp:H3 " + privateTestDir + "fakeHM3.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("cc7184263975595a6e2473d153227146"));
Arrays.asList("cb50876477d3e035b6eda5d720d7ba8d"));
executeTest("getting DB tag with HM3", spec);
}
@ -130,7 +130,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoQuals() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --variant " + privateTestDir + "noQual.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L " + privateTestDir + "noQual.vcf -A QualByDepth", 1,
Arrays.asList("aea983adc01cd059193538cc30adc17d"));
Arrays.asList("458412261d61797d39f802c1e03d63f6"));
executeTest("test file doesn't have QUALs", spec);
}
@ -138,7 +138,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpression() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.AF -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("2b0e8cdfd691779befc5ac123d1a1887"));
Arrays.asList("39defa8108dca9fa3e54b22a7da43f77"));
executeTest("using expression", spec);
}
@ -146,7 +146,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testUsingExpressionWithID() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " --resource:foo " + privateTestDir + "targetAnnotations.vcf -G Standard --variant " + privateTestDir + "vcfexample3empty.vcf -E foo.ID -L " + privateTestDir + "vcfexample3empty.vcf", 1,
Arrays.asList("3de1d1998203518098ffae233f3e2352"));
Arrays.asList("a917edd58a0c235e9395bfc2d2020a8c"));
executeTest("using expression with ID", spec);
}