Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
18b002c99c
97
build.xml
97
build.xml
|
|
@ -185,10 +185,7 @@
|
||||||
<include name="**/*.class"/>
|
<include name="**/*.class"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
|
|
||||||
<patternset id="dependency.mask" includes="*.jar">
|
<patternset id="dependency.mask" includes="*.jar" />
|
||||||
<exclude name="testng*.jar" />
|
|
||||||
<exclude name="bcel*.jar" />
|
|
||||||
</patternset>
|
|
||||||
|
|
||||||
<path id="external.dependencies">
|
<path id="external.dependencies">
|
||||||
<fileset dir="${lib.dir}" erroronmissingdir="false">
|
<fileset dir="${lib.dir}" erroronmissingdir="false">
|
||||||
|
|
@ -205,6 +202,16 @@
|
||||||
<pathelement location="${scala.classes}" />
|
<pathelement location="${scala.classes}" />
|
||||||
</path>
|
</path>
|
||||||
|
|
||||||
|
<path id="build.results">
|
||||||
|
<!-- Ensure that GenomeAnalysisTK.jar comes first in the path, as it contains overrides for certain classes in our dependencies -->
|
||||||
|
<pathelement location="${dist.dir}/GenomeAnalysisTK.jar" />
|
||||||
|
<!-- After GenomeAnalysisTK.jar we include all of the other jars in the dist directory -->
|
||||||
|
<fileset dir="${dist.dir}" erroronmissingdir="false">
|
||||||
|
<patternset refid="dependency.mask" />
|
||||||
|
<exclude name="GenomeAnalysisTK.jar" />
|
||||||
|
</fileset>
|
||||||
|
</path>
|
||||||
|
|
||||||
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
|
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
|
||||||
<include name="**/*.java" />
|
<include name="**/*.java" />
|
||||||
</fileset>
|
</fileset>
|
||||||
|
|
@ -226,20 +233,20 @@
|
||||||
<!-- the path for resources that need to go into the GATK jar;
|
<!-- the path for resources that need to go into the GATK jar;
|
||||||
any additional resources should go into this set. -->
|
any additional resources should go into this set. -->
|
||||||
<path id="gatk.resources">
|
<path id="gatk.resources">
|
||||||
<fileset dir="${basedir}">
|
<fileset dir="${java.public.source.dir}">
|
||||||
<include name="${java.public.source.dir}/**/templates/*" />
|
<include name="**/resources/*" />
|
||||||
<include name="${java.private.source.dir}/**/templates/*" if="include.private" />
|
<include name="**/templates/*" />
|
||||||
<include name="${java.protected.source.dir}/**/templates/*" if="include.protected" />
|
</fileset>
|
||||||
|
<fileset dir="${java.private.source.dir}" erroronmissingdir="false">
|
||||||
|
<include name="**/resources/*" if="include.private" />
|
||||||
|
<include name="**/templates/*" if="include.private" />
|
||||||
|
</fileset>
|
||||||
|
<fileset dir="${java.protected.source.dir}" erroronmissingdir="false">
|
||||||
|
<include name="**/resources/*" if="include.protected" />
|
||||||
|
<include name="**/templates/*" if="include.protected" />
|
||||||
</fileset>
|
</fileset>
|
||||||
</path>
|
</path>
|
||||||
|
|
||||||
<path id="build.results">
|
|
||||||
<fileset dir="${dist.dir}">
|
|
||||||
<patternset refid="dependency.mask" />
|
|
||||||
</fileset>
|
|
||||||
</path>
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ******************************************************************************** -->
|
<!-- ******************************************************************************** -->
|
||||||
<!-- Ivy Retrieve -->
|
<!-- Ivy Retrieve -->
|
||||||
<!-- ******************************************************************************** -->
|
<!-- ******************************************************************************** -->
|
||||||
|
|
@ -672,6 +679,24 @@
|
||||||
</jar>
|
</jar>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="na12878kb.jar" depends="gatk.compile,init.jar">
|
||||||
|
<jar jarfile="${dist.dir}/na12878kb.jar">
|
||||||
|
<fileset dir="${java.classes}">
|
||||||
|
<include name="org/broadinstitute/sting/utils/GenomeLocParser*.class"/>
|
||||||
|
<include name="org/broadinstitute/sting/utils/GenomeLoc.class"/>
|
||||||
|
<include name="org/broadinstitute/sting/utils/HasGenomeLocation.class"/>
|
||||||
|
<include name="org/broadinstitute/sting/utils/BaseUtils.class"/>
|
||||||
|
<include name="org/broadinstitute/sting/utils/Utils.class"/>
|
||||||
|
<include name="org/broadinstitute/sting/utils/exceptions/**/*.class"/>
|
||||||
|
<include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/**/*.class"/>
|
||||||
|
<include name="net/sf/picard/reference/FastaSequenceFile.class"/>
|
||||||
|
</fileset>
|
||||||
|
<fileset dir="${java.private.source.dir}">
|
||||||
|
<include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/resources/**/*"/>
|
||||||
|
</fileset>
|
||||||
|
</jar>
|
||||||
|
</target>
|
||||||
|
|
||||||
<target name="gatk.jar" depends="gatk.compile, init.jar, R.script.stage" description="generate the GATK distribution">
|
<target name="gatk.jar" depends="gatk.compile, init.jar, R.script.stage" description="generate the GATK distribution">
|
||||||
<jar jarfile="${dist.dir}/GenomeAnalysisTK.jar">
|
<jar jarfile="${dist.dir}/GenomeAnalysisTK.jar">
|
||||||
<path refid="gatk.resources"/>
|
<path refid="gatk.resources"/>
|
||||||
|
|
@ -1103,15 +1128,10 @@
|
||||||
</path>
|
</path>
|
||||||
|
|
||||||
<path id="testng.default.classpath">
|
<path id="testng.default.classpath">
|
||||||
<pathelement location="${java.classes}" />
|
<path refid="build.results" />
|
||||||
<pathelement location="${scala.classes}" />
|
|
||||||
<pathelement location="${java.contracts.dir}" />
|
<pathelement location="${java.contracts.dir}" />
|
||||||
<pathelement location="${java.test.classes}" />
|
<pathelement location="${java.test.classes}" />
|
||||||
<pathelement location="${scala.test.classes}" />
|
<pathelement location="${scala.test.classes}" />
|
||||||
<pathelement location="${R.tar.dir}" />
|
|
||||||
<path refid="R.script.source.path" />
|
|
||||||
<pathelement location="${key.dir}" />
|
|
||||||
<path refid="external.dependencies" />
|
|
||||||
</path>
|
</path>
|
||||||
|
|
||||||
<!-- Test targets -->
|
<!-- Test targets -->
|
||||||
|
|
@ -1119,9 +1139,6 @@
|
||||||
<target name="test.init.compile">
|
<target name="test.init.compile">
|
||||||
<mkdir dir="${java.test.classes}"/>
|
<mkdir dir="${java.test.classes}"/>
|
||||||
<mkdir dir="${scala.test.classes}"/>
|
<mkdir dir="${scala.test.classes}"/>
|
||||||
<antcall target="resolve">
|
|
||||||
<param name="ivy.conf" value="test"/>
|
|
||||||
</antcall>
|
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="test.java.internal.compile" depends="dist,test.init.compile">
|
<target name="test.java.internal.compile" depends="dist,test.init.compile">
|
||||||
|
|
@ -1129,10 +1146,8 @@
|
||||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||||
<src refid="java.test.source.path" />
|
<src refid="java.test.source.path" />
|
||||||
<classpath>
|
<classpath>
|
||||||
<path refid="external.dependencies" />
|
<path refid="build.results" />
|
||||||
<pathelement location="${java.classes}"/>
|
|
||||||
<pathelement location="${java.contracts.dir}"/>
|
<pathelement location="${java.contracts.dir}"/>
|
||||||
<pathelement location="${testng.jar}"/>
|
|
||||||
</classpath>
|
</classpath>
|
||||||
<compilerarg value="-proc:none"/>
|
<compilerarg value="-proc:none"/>
|
||||||
</javac>
|
</javac>
|
||||||
|
|
@ -1143,11 +1158,9 @@
|
||||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}" srcdir="${external.dir}">
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}" srcdir="${external.dir}">
|
||||||
<include name="*/test/**/*.java"/>
|
<include name="*/test/**/*.java"/>
|
||||||
<classpath>
|
<classpath>
|
||||||
<path refid="external.dependencies" />
|
<path refid="build.results" />
|
||||||
<pathelement location="${java.test.classes}"/>
|
<pathelement location="${java.test.classes}"/>
|
||||||
<pathelement location="${java.classes}"/>
|
|
||||||
<pathelement location="${java.contracts.dir}"/>
|
<pathelement location="${java.contracts.dir}"/>
|
||||||
<pathelement location="${testng.jar}"/>
|
|
||||||
</classpath>
|
</classpath>
|
||||||
<compilerarg value="-proc:none"/>
|
<compilerarg value="-proc:none"/>
|
||||||
</javac>
|
</javac>
|
||||||
|
|
@ -1160,9 +1173,8 @@
|
||||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
|
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
|
||||||
<src refid="scala.test.source.path" />
|
<src refid="scala.test.source.path" />
|
||||||
<classpath>
|
<classpath>
|
||||||
<path refid="scala.dependencies"/>
|
<path refid="build.results"/>
|
||||||
<pathelement location="${java.test.classes}"/>
|
<pathelement location="${java.test.classes}"/>
|
||||||
<pathelement location="${testng.jar}"/>
|
|
||||||
</classpath>
|
</classpath>
|
||||||
</scalac>
|
</scalac>
|
||||||
</target>
|
</target>
|
||||||
|
|
@ -1369,14 +1381,13 @@
|
||||||
|
|
||||||
<!-- Fast test target that cuts major corners for speed. Requires that a full build has been done first. Java-only, single test class only -->
|
<!-- Fast test target that cuts major corners for speed. Requires that a full build has been done first. Java-only, single test class only -->
|
||||||
<!-- Usage: ant fasttest -Dsingle=TestClass -->
|
<!-- Usage: ant fasttest -Dsingle=TestClass -->
|
||||||
<target name="fasttest" depends="init.javaonly,init,test.init">
|
<target name="fasttest" depends="init.javaonly,init">
|
||||||
<condition property="not.clean">
|
<condition property="not.clean">
|
||||||
<and>
|
<and>
|
||||||
<available file="${build.dir}" />
|
<available file="${build.dir}" />
|
||||||
<available file="${lib.dir}" />
|
<available file="${lib.dir}" />
|
||||||
<available file="${dist.dir}" />
|
<available file="${dist.dir}" />
|
||||||
<available file="${java.test.classes}" />
|
<available file="${java.test.classes}" />
|
||||||
<available file="${testng.jar}" />
|
|
||||||
</and>
|
</and>
|
||||||
</condition>
|
</condition>
|
||||||
<fail message="fasttest requires a NON-CLEAN working directory (INCLUDING test classes). Do a full test build using ant test.compile first." unless="not.clean" />
|
<fail message="fasttest requires a NON-CLEAN working directory (INCLUDING test classes). Do a full test build using ant test.compile first." unless="not.clean" />
|
||||||
|
|
@ -1394,13 +1405,27 @@
|
||||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||||
<src refid="java.test.source.path" />
|
<src refid="java.test.source.path" />
|
||||||
<classpath>
|
<classpath>
|
||||||
<path refid="external.dependencies" />
|
|
||||||
<pathelement location="${java.classes}"/>
|
<pathelement location="${java.classes}"/>
|
||||||
<pathelement location="${testng.jar}"/>
|
<path refid="external.dependencies" />
|
||||||
</classpath>
|
</classpath>
|
||||||
<compilerarg value="-proc:none"/>
|
<compilerarg value="-proc:none"/>
|
||||||
</javac>
|
</javac>
|
||||||
|
|
||||||
|
<!-- fasttest uses the unpackaged class files in its test classpath to avoid having to rebuild the jars in dist/ -->
|
||||||
|
<path id="testng.fasttest.classpath">
|
||||||
|
<pathelement location="${java.classes}" />
|
||||||
|
<pathelement location="${scala.classes}" />
|
||||||
|
<pathelement location="${java.contracts.dir}" />
|
||||||
|
<pathelement location="${java.test.classes}" />
|
||||||
|
<pathelement location="${scala.test.classes}" />
|
||||||
|
<pathelement location="${R.tar.dir}" />
|
||||||
|
<path refid="R.script.source.path" />
|
||||||
|
<pathelement location="${key.dir}" />
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<path refid="java.source.path" /> <!-- Terrible hack to allow fasttest to see resource files stored in the source tree -->
|
||||||
|
</path>
|
||||||
|
<property name="testng.classpath" value="testng.fasttest.classpath" />
|
||||||
|
|
||||||
<run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
|
<run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
</project>
|
</project>
|
||||||
|
|
|
||||||
11
ivy.xml
11
ivy.xml
|
|
@ -24,11 +24,8 @@
|
||||||
|
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="org.broadinstitute" module="Sting"/>
|
<info organisation="org.broadinstitute" module="Sting"/>
|
||||||
<configurations defaultconfmapping="test->default">
|
<configurations>
|
||||||
<conf name="default" description="the core dependencies for the GATK"/>
|
<conf name="default" description="the core dependencies for the GATK"/>
|
||||||
<conf name="test" extends="default" description="external dependencies used for testing and metrics"/>
|
|
||||||
<conf name="scala" extends="default" description="the dependencies for scala"/>
|
|
||||||
<conf name="queue" extends="scala" description="the dependencies for Queue"/>
|
|
||||||
</configurations>
|
</configurations>
|
||||||
<dependencies defaultconf="default">
|
<dependencies defaultconf="default">
|
||||||
<dependency org="net.sf" name="sam" rev="latest.integration"/>
|
<dependency org="net.sf" name="sam" rev="latest.integration"/>
|
||||||
|
|
@ -83,9 +80,9 @@
|
||||||
<dependency org="org.scala-lang" name="scala-library" rev="2.9.2"/>
|
<dependency org="org.scala-lang" name="scala-library" rev="2.9.2"/>
|
||||||
|
|
||||||
<!-- testing and evaluation dependencies -->
|
<!-- testing and evaluation dependencies -->
|
||||||
<dependency org="org.testng" name="testng" rev="5.14.1" conf="test"/>
|
<dependency org="org.testng" name="testng" rev="5.14.1"/>
|
||||||
<dependency org="org.uncommons" name="reportng" rev="1.1.2" conf="test"/>
|
<dependency org="org.uncommons" name="reportng" rev="1.1.2"/>
|
||||||
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
|
<dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT"/>
|
||||||
|
|
||||||
<!-- Contracts for Java and dependencies -->
|
<!-- Contracts for Java and dependencies -->
|
||||||
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
|
<dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
|
|
@ -292,9 +293,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
@Override
|
@Override
|
||||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||||
|
|
||||||
// enable non primary reads in the active region
|
// enable non primary and extended reads in the active region
|
||||||
@Override
|
@Override
|
||||||
public boolean wantsNonPrimaryReads() { return true; }
|
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||||
|
return EnumSet.of(
|
||||||
|
ActiveRegionReadState.PRIMARY,
|
||||||
|
ActiveRegionReadState.NONPRIMARY,
|
||||||
|
ActiveRegionReadState.EXTENDED
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||||
|
|
|
||||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultipleSNPAlleles() {
|
public void testMultipleSNPAlleles() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||||
Arrays.asList("704888987baacff8c7b273b8ab9938d0"));
|
Arrays.asList("d20c7a143b899f0239bf64b652ad3edb"));
|
||||||
executeTest("test Multiple SNP alleles", spec);
|
executeTest("test Multiple SNP alleles", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -197,7 +197,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOutputParameterAllSites() {
|
public void testOutputParameterAllSites() {
|
||||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434");
|
testOutputParameters("--output_mode EMIT_ALL_SITES", "8b26088a035e579c4afd3b46737291e4");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testOutputParameters(final String args, final String md5) {
|
private void testOutputParameters(final String args, final String md5) {
|
||||||
|
|
@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
public void testMultiSampleIndels1() {
|
public void testMultiSampleIndels1() {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||||
Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
|
Arrays.asList("69df7a00f800204564ca3726e1871132"));
|
||||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||||
|
|
||||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||||
Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
|
Arrays.asList("1256a7eceff2c2374c231ff981df486d"));
|
||||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,14 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class AFCalcResultUnitTest extends BaseTest {
|
public class AFCalcResultUnitTest extends BaseTest {
|
||||||
private static class MyTest {
|
private static class MyTest {
|
||||||
|
|
@ -79,4 +77,54 @@ public class AFCalcResultUnitTest extends BaseTest {
|
||||||
final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
|
final double[] actualPosteriors = new double[]{result.getLog10PosteriorOfAFEq0(), result.getLog10PosteriorOfAFGT0()};
|
||||||
Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
|
Assert.assertEquals(MathUtils.sumLog10(actualPosteriors), 1.0, 1e-3, "Posteriors don't sum to 1 with 1e-3 precision");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "TestIsPolymorphic")
|
||||||
|
public Object[][] makeTestIsPolymorphic() {
|
||||||
|
List<Object[]> tests = new ArrayList<Object[]>();
|
||||||
|
|
||||||
|
final List<Double> pValues = new LinkedList<Double>();
|
||||||
|
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
|
||||||
|
for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
|
||||||
|
pValues.add(p + espilon);
|
||||||
|
|
||||||
|
for ( final double pNonRef : pValues ) {
|
||||||
|
for ( final double pThreshold : pValues ) {
|
||||||
|
final boolean shouldBePoly = pNonRef >= pThreshold;
|
||||||
|
if ( pNonRef != pThreshold)
|
||||||
|
// let's not deal with numerical instability
|
||||||
|
tests.add(new Object[]{ pNonRef, pThreshold, shouldBePoly });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tests.toArray(new Object[][]{});
|
||||||
|
}
|
||||||
|
|
||||||
|
private AFCalcResult makePolymorphicTestData(final double pNonRef) {
|
||||||
|
return new AFCalcResult(
|
||||||
|
new int[]{0},
|
||||||
|
1,
|
||||||
|
alleles,
|
||||||
|
MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
|
||||||
|
log10Even,
|
||||||
|
Collections.singletonMap(C, Math.log10(pNonRef)));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
|
||||||
|
private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
|
||||||
|
final AFCalcResult result = makePolymorphicTestData(pNonRef);
|
||||||
|
final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(pThreshold));
|
||||||
|
Assert.assertEquals(actualIsPoly, shouldBePoly,
|
||||||
|
"isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
|
||||||
|
+ actualIsPoly + " but the expected result is " + shouldBePoly);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
|
||||||
|
private void testIsPolymorphicQual(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
|
||||||
|
final AFCalcResult result = makePolymorphicTestData(pNonRef);
|
||||||
|
final double qual = QualityUtils.phredScaleCorrectRate(pThreshold);
|
||||||
|
final boolean actualIsPoly = result.isPolymorphicPhredScaledQual(C, qual);
|
||||||
|
Assert.assertEquals(actualIsPoly, shouldBePoly,
|
||||||
|
"isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
|
||||||
|
+ actualIsPoly + " but the expected result is " + shouldBePoly);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -288,7 +289,7 @@ public abstract class CommandLineProgram {
|
||||||
*/
|
*/
|
||||||
private static void printDocumentationReference() {
|
private static void printDocumentationReference() {
|
||||||
errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
|
errorPrintf("Visit our website and forum for extensive documentation and answers to %n");
|
||||||
errorPrintf("commonly asked questions http://www.broadinstitute.org/gatk%n");
|
errorPrintf("commonly asked questions " + HelpUtils.BASE_GATK_URL + "%n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -160,7 +161,7 @@ public class CommandLineGATK extends CommandLineExecutable {
|
||||||
List<String> header = new ArrayList<String>();
|
List<String> header = new ArrayList<String>();
|
||||||
header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
|
header.add(String.format("The Genome Analysis Toolkit (GATK) v%s, Compiled %s",getVersionNumber(), getBuildTime()));
|
||||||
header.add("Copyright (c) 2010 The Broad Institute");
|
header.add("Copyright (c) 2010 The Broad Institute");
|
||||||
header.add("For support and documentation go to http://www.broadinstitute.org/gatk");
|
header.add("For support and documentation go to " + HelpUtils.BASE_GATK_URL);
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,12 +30,10 @@ import net.sf.samtools.*;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.*;
|
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
|
||||||
import org.broadinstitute.sting.gatk.ReadMetrics;
|
import org.broadinstitute.sting.gatk.ReadMetrics;
|
||||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
import org.broadinstitute.sting.gatk.downsampling.*;
|
||||||
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
|
||||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.iterators.*;
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
|
|
@ -567,7 +565,7 @@ public class SAMDataSource {
|
||||||
*
|
*
|
||||||
* @return the start positions of the first chunk of reads for all BAM files
|
* @return the start positions of the first chunk of reads for all BAM files
|
||||||
*/
|
*/
|
||||||
public Map<SAMReaderID, GATKBAMFileSpan> getInitialReaderPositions() {
|
protected Map<SAMReaderID, GATKBAMFileSpan> getInitialReaderPositions() {
|
||||||
Map<SAMReaderID, GATKBAMFileSpan> initialPositions = new HashMap<SAMReaderID, GATKBAMFileSpan>();
|
Map<SAMReaderID, GATKBAMFileSpan> initialPositions = new HashMap<SAMReaderID, GATKBAMFileSpan>();
|
||||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||||
|
|
||||||
|
|
@ -585,7 +583,7 @@ public class SAMDataSource {
|
||||||
* @param shard The shard specifying the data limits.
|
* @param shard The shard specifying the data limits.
|
||||||
* @return An iterator over the selected data.
|
* @return An iterator over the selected data.
|
||||||
*/
|
*/
|
||||||
public StingSAMIterator getIterator( Shard shard ) {
|
protected StingSAMIterator getIterator( Shard shard ) {
|
||||||
return getIterator(resourcePool.getAvailableReaders(), shard, shard instanceof ReadShard);
|
return getIterator(resourcePool.getAvailableReaders(), shard, shard instanceof ReadShard);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.filters;
|
package org.broadinstitute.sting.gatk.filters;
|
||||||
|
|
||||||
import com.google.common.base.Function;
|
|
||||||
import com.google.common.collect.Collections2;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -73,7 +71,7 @@ public class FilterManager extends PluginManager<ReadFilter> {
|
||||||
|
|
||||||
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
|
return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
|
||||||
userFriendlyListofReadFilters(availableFilters),
|
userFriendlyListofReadFilters(availableFilters),
|
||||||
"Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
|
"Please consult the GATK Documentation (" + HelpUtils.GATK_DOCS_URL + ") for more information.");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
|
private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
|
||||||
|
|
|
||||||
|
|
@ -258,13 +258,23 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
||||||
activeRegion.add( read );
|
activeRegion.add( read );
|
||||||
}
|
}
|
||||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||||
otherRegionToTest.add( read );
|
// check for non-primary vs. extended
|
||||||
|
if ( otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
|
||||||
|
otherRegionToTest.add( read );
|
||||||
|
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||||
|
otherRegionToTest.add( read );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
placedReads.add( read );
|
placedReads.add( read );
|
||||||
} else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
|
// check for non-primary vs. extended
|
||||||
|
} else if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||||
|
if ( walker.wantsNonPrimaryReads() ) {
|
||||||
|
activeRegion.add( read );
|
||||||
|
}
|
||||||
|
} else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
|
||||||
activeRegion.add( read );
|
activeRegion.add( read );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
|
@ -13,14 +14,14 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for all the Active Region Walkers.
|
* Base class for all the Active Region Walkers.
|
||||||
|
|
@ -70,11 +71,24 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
||||||
return true; // We are keeping all the reads
|
return true; // We are keeping all the reads
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean wantsNonPrimaryReads() {
|
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||||
return false;
|
return EnumSet.of(ActiveRegionReadState.PRIMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public final boolean wantsNonPrimaryReads() {
|
||||||
|
return desiredReadStates().contains(ActiveRegionReadState.NONPRIMARY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean wantsExtendedReads() {
|
||||||
|
return desiredReadStates().contains(ActiveRegionReadState.EXTENDED);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean wantsUnmappedReads() {
|
||||||
|
return desiredReadStates().contains(ActiveRegionReadState.UNMAPPED);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine probability of active status over the AlignmentContext
|
// Determine probability of active status over the AlignmentContext
|
||||||
|
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||||
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||||
|
|
||||||
// Map over the ActiveRegion
|
// Map over the ActiveRegion
|
||||||
|
|
|
||||||
|
|
@ -282,6 +282,7 @@ public class DiffEngine {
|
||||||
// now that we have a specific list of values we want to show, display them
|
// now that we have a specific list of values we want to show, display them
|
||||||
GATKReport report = new GATKReport();
|
GATKReport report = new GATKReport();
|
||||||
final String tableName = "differences";
|
final String tableName = "differences";
|
||||||
|
// TODO for Geraldine -- link needs to be updated below
|
||||||
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
|
report.addTable(tableName, "Summarized differences between the master and test files. See http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine for more information", 3);
|
||||||
final GATKReportTable table = report.getTable(tableName);
|
final GATKReportTable table = report.getTable(tableName);
|
||||||
table.addColumn("Difference");
|
table.addColumn("Difference");
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,8 @@ public class DiffObjects extends RodWalker<Integer, Integer> {
|
||||||
/**
|
/**
|
||||||
* Writes out a file of the DiffEngine format:
|
* Writes out a file of the DiffEngine format:
|
||||||
*
|
*
|
||||||
* http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine
|
* TODO for Geraldine -- link needs to be updated below (and also in SelectVariants and RefSeqCodec GATK docs)
|
||||||
|
* http://www.broadinstitute.org/gsa/wiki/index.php/DiffEngine
|
||||||
*/
|
*/
|
||||||
@Output(doc="File to which results should be written",required=true)
|
@Output(doc="File to which results should be written",required=true)
|
||||||
protected PrintStream out;
|
protected PrintStream out;
|
||||||
|
|
|
||||||
|
|
@ -382,11 +382,9 @@ public class UnifiedGenotyperEngine {
|
||||||
if ( alternateAllele.isReference() )
|
if ( alternateAllele.isReference() )
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// we are non-ref if the probability of being non-ref > the emit confidence.
|
// Compute if the site is considered polymorphic with sufficient confidence relative to our
|
||||||
// the emit confidence is phred-scaled, say 30 => 10^-3.
|
// phred-scaled emission QUAL
|
||||||
// the posterior AF > 0 is log10: -5 => 10^-5
|
final boolean isNonRef = AFresult.isPolymorphicPhredScaledQual(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING);
|
||||||
// we are non-ref if 10^-5 < 10^-3 => -5 < -3
|
|
||||||
final boolean isNonRef = AFresult.isPolymorphic(alternateAllele, UAC.STANDARD_CONFIDENCE_FOR_EMITTING / -10.0);
|
|
||||||
|
|
||||||
// if the most likely AC is not 0, then this is a good alternate allele to use
|
// if the most likely AC is not 0, then this is a good alternate allele to use
|
||||||
if ( isNonRef ) {
|
if ( isNonRef ) {
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
|
@ -234,10 +235,20 @@ public class AFCalcResult {
|
||||||
*
|
*
|
||||||
* @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0
|
* @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0
|
||||||
*/
|
*/
|
||||||
|
@Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
|
||||||
public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
|
public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
|
||||||
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as #isPolymorphic but takes a phred-scaled quality score as input
|
||||||
|
*/
|
||||||
|
public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
|
||||||
|
if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
|
||||||
|
final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
|
||||||
|
return isPolymorphic(allele, log10Threshold);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
|
* Are any of the alleles polymorphic w.r.t. #isPolymorphic?
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,8 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
|
||||||
|
|
||||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
|
||||||
private static final boolean DEBUG = false;
|
@Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information (if -l DEBUG is also specified)", required = false)
|
||||||
|
protected boolean DEBUG = false;
|
||||||
/**
|
/**
|
||||||
* The VCF file we are phasing variants from.
|
* The VCF file we are phasing variants from.
|
||||||
*
|
*
|
||||||
|
|
@ -949,7 +950,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
}
|
}
|
||||||
|
|
||||||
if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
|
||||||
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
|
MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, DEBUG);
|
||||||
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
|
||||||
|
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
|
|
@ -971,7 +972,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
|
public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
|
||||||
// Marginalize each haplotype to its first 2 positions:
|
// Marginalize each haplotype to its first 2 positions:
|
||||||
hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
|
hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
|
||||||
if (DEBUG && printDebug)
|
if (printDebug)
|
||||||
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
|
logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
|
||||||
|
|
||||||
calculateMaxHapAndPhasingQuality(hapTable, printDebug);
|
calculateMaxHapAndPhasingQuality(hapTable, printDebug);
|
||||||
|
|
@ -981,7 +982,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
|
|
||||||
private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
|
private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
|
||||||
hapTable.normalizeScores();
|
hapTable.normalizeScores();
|
||||||
if (DEBUG && printDebug)
|
if (printDebug)
|
||||||
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
|
logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
|
||||||
|
|
||||||
// Determine the phase at this position:
|
// Determine the phase at this position:
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ import java.util.Set;
|
||||||
/**
|
/**
|
||||||
* Stratifies the eval RODs by user-supplied JEXL expressions
|
* Stratifies the eval RODs by user-supplied JEXL expressions
|
||||||
*
|
*
|
||||||
* See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details
|
* See http://gatkforums.broadinstitute.org/discussion/1255/what-are-jexl-expressions-and-how-can-i-use-them-with-the-gatk for more details
|
||||||
*/
|
*/
|
||||||
public class JexlExpression extends VariantStratifier implements StandardStratification {
|
public class JexlExpression extends VariantStratifier implements StandardStratification {
|
||||||
// needs to know the jexl expressions
|
// needs to know the jexl expressions
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -80,7 +81,7 @@ public class VariantDataManager {
|
||||||
final double theSTD = standardDeviation(theMean, iii);
|
final double theSTD = standardDeviation(theMean, iii);
|
||||||
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
|
logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
|
||||||
if( Double.isNaN(theMean) ) {
|
if( Double.isNaN(theMean) ) {
|
||||||
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See http://www.broadinstitute.org/gsa/wiki/index.php/VariantAnnotator");
|
throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.GATK_FORUM_URL + "discussion/49/using-variant-annotator");
|
||||||
}
|
}
|
||||||
|
|
||||||
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
|
foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,7 @@ import java.util.*;
|
||||||
* The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
|
* The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
|
||||||
* The tool produces a VCF that is annotated with information pertaining to plate quality control and by
|
* The tool produces a VCF that is annotated with information pertaining to plate quality control and by
|
||||||
* default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
|
* default is soft-filtered by high no-call rate or low Hardy-Weinberg probability.
|
||||||
* If you have .ped files, please first convert them to VCF format
|
* If you have .ped files, please first convert them to VCF format.
|
||||||
* (see http://www.broadinstitute.org/gsa/wiki/index.php/Converting_ped_to_vcf).
|
|
||||||
*
|
*
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
||||||
|
|
@ -210,13 +210,23 @@ public class JnaSession implements Session {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void setAttribute(Pointer jt, String name, String value) throws DrmaaException {
|
public static void setAttribute(Pointer jt, String name, String value) throws DrmaaException {
|
||||||
checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
|
if (getAttrNames().contains(name)) {
|
||||||
|
checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String getAttribute(Pointer jt, String name) throws DrmaaException {
|
public static String getAttribute(Pointer jt, String name) throws DrmaaException {
|
||||||
Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
|
if (getAttrNames().contains(name)) {
|
||||||
checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
|
Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
|
||||||
return attrBuffer.getString(0);
|
checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
|
||||||
|
return attrBuffer.getString(0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void setVectorAttribute(Pointer jt, String name, Collection<String> values) throws DrmaaException {
|
public static void setVectorAttribute(Pointer jt, String name, Collection<String> values) throws DrmaaException {
|
||||||
|
|
|
||||||
|
|
@ -374,7 +374,7 @@ public final class GenomeLocParser {
|
||||||
int start = 1;
|
int start = 1;
|
||||||
int stop = -1;
|
int stop = -1;
|
||||||
|
|
||||||
final int colonIndex = str.indexOf(":");
|
final int colonIndex = str.lastIndexOf(":");
|
||||||
if(colonIndex == -1) {
|
if(colonIndex == -1) {
|
||||||
contig = str.substring(0, str.length()); // chr1
|
contig = str.substring(0, str.length()); // chr1
|
||||||
stop = Integer.MAX_VALUE;
|
stop = Integer.MAX_VALUE;
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import net.sf.samtools.SAMUtils;
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
public class QualityUtils {
|
public class QualityUtils {
|
||||||
public final static byte MAX_RECALIBRATED_Q_SCORE = 93;
|
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||||
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
package org.broadinstitute.sting.utils.activeregion;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: thibault
|
||||||
|
* Date: 11/26/12
|
||||||
|
* Time: 2:35 PM
|
||||||
|
*
|
||||||
|
* Describes how a read relates to an assigned ActiveRegion
|
||||||
|
*/
|
||||||
|
public enum ActiveRegionReadState {
|
||||||
|
PRIMARY, // This is the read's primary region
|
||||||
|
NONPRIMARY, // This region overlaps the read, but it is not primary
|
||||||
|
EXTENDED, // This region would overlap the read if it were extended
|
||||||
|
UNMAPPED // This read is not mapped
|
||||||
|
}
|
||||||
|
|
@ -406,10 +406,15 @@ public class BAQ {
|
||||||
// so BQi = Qi - BAQi + 64
|
// so BQi = Qi - BAQi + 64
|
||||||
byte[] bqTag = new byte[baq.length];
|
byte[] bqTag = new byte[baq.length];
|
||||||
for ( int i = 0; i < bqTag.length; i++) {
|
for ( int i = 0; i < bqTag.length; i++) {
|
||||||
int bq = (int)read.getBaseQualities()[i] + 64;
|
final int bq = (int)read.getBaseQualities()[i] + 64;
|
||||||
int baq_i = (int)baq[i];
|
final int baq_i = (int)baq[i];
|
||||||
int tag = bq - baq_i;
|
final int tag = bq - baq_i;
|
||||||
if ( tag < 0 ) throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read);
|
// problem with the calculation of the correction factor; this is our problem
|
||||||
|
if ( tag < 0 )
|
||||||
|
throw new ReviewedStingException("BAQ tag calculation error. BAQ value above base quality at " + read);
|
||||||
|
// the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
|
||||||
|
if ( tag > Byte.MAX_VALUE )
|
||||||
|
throw new UserException.MalformedBAM(read, "we encountered an extremely high quality score (" + (bq - 64) + ") with BAQ correction factor of " + baq_i + "; the BAM file appears to be using the wrong encoding for quality scores");
|
||||||
bqTag[i] = (byte)tag;
|
bqTag[i] = (byte)tag;
|
||||||
}
|
}
|
||||||
return new String(bqTag);
|
return new String(bqTag);
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
|
@ -267,7 +268,7 @@ public class UserException extends ReviewedStingException {
|
||||||
|
|
||||||
public static class ReadMissingReadGroup extends MalformedBAM {
|
public static class ReadMissingReadGroup extends MalformedBAM {
|
||||||
public ReadMissingReadGroup(SAMRecord read) {
|
public ReadMissingReadGroup(SAMRecord read) {
|
||||||
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
|
super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK. Please use " + HelpUtils.GATK_FORUM_URL + "discussion/59/companion-utilities-replacereadgroups to fix this problem", read.getReadName()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -343,7 +344,7 @@ public class UserException extends ReviewedStingException {
|
||||||
super(String.format("Lexicographically sorted human genome sequence detected in %s."
|
super(String.format("Lexicographically sorted human genome sequence detected in %s."
|
||||||
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
|
+ "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
|
||||||
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
+ "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
|
||||||
+ "\nYou can use the ReorderSam utility to fix this problem: http://www.broadinstitute.org/gsa/wiki/index.php/ReorderSam"
|
+ "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.GATK_FORUM_URL + "discussion/58/companion-utilities-reordersam"
|
||||||
+ "\n %s contigs = %s",
|
+ "\n %s contigs = %s",
|
||||||
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,14 +44,13 @@ public class ForumAPIUtils {
|
||||||
/**
|
/**
|
||||||
* How we post to the forum
|
* How we post to the forum
|
||||||
*/
|
*/
|
||||||
private final static String API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
|
|
||||||
final private static String ACCESS_TOKEN = "access_token=";
|
final private static String ACCESS_TOKEN = "access_token=";
|
||||||
|
|
||||||
public static List<String> getPostedTools(String forumKey) {
|
public static List<String> getPostedTools(String forumKey) {
|
||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
List<String> output = new ArrayList<String>();
|
List<String> output = new ArrayList<String>();
|
||||||
|
|
||||||
String text = httpGet(API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
|
String text = httpGet(HelpUtils.GATK_FORUM_API_URL + "categories.json?CategoryIdentifier=tool-bulletin&page=1-100000&" + ACCESS_TOKEN + forumKey);
|
||||||
|
|
||||||
APIQuery details = gson.fromJson(text, APIQuery.class);
|
APIQuery details = gson.fromJson(text, APIQuery.class);
|
||||||
ForumDiscussion[] discussions = details.Discussions;
|
ForumDiscussion[] discussions = details.Discussions;
|
||||||
|
|
@ -159,7 +158,7 @@ public class ForumAPIUtils {
|
||||||
Gson gson = new Gson();
|
Gson gson = new Gson();
|
||||||
|
|
||||||
String data = gson.toJson(post.getPostData());
|
String data = gson.toJson(post.getPostData());
|
||||||
httpPost(data, API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
|
httpPost(data, HelpUtils.GATK_FORUM_API_URL + "post/discussion.json?" + ACCESS_TOKEN + forumKey);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -167,8 +166,7 @@ public class ForumAPIUtils {
|
||||||
class APIQuery {
|
class APIQuery {
|
||||||
ForumDiscussion[] Discussions;
|
ForumDiscussion[] Discussions;
|
||||||
|
|
||||||
public APIQuery() {
|
public APIQuery() {}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ public class GATKDocUtils {
|
||||||
/**
|
/**
|
||||||
* The URL root for RELEASED GATKDOC units
|
* The URL root for RELEASED GATKDOC units
|
||||||
*/
|
*/
|
||||||
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = "http://www.broadinstitute.org/gatk/gatkdocs/";
|
public final static String URL_ROOT_FOR_RELEASE_GATKDOCS = HelpUtils.GATK_DOCS_URL;
|
||||||
/**
|
/**
|
||||||
* The URL root for STABLE GATKDOC units
|
* The URL root for STABLE GATKDOC units
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,15 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
import java.lang.reflect.Field;
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
public class HelpUtils {
|
public class HelpUtils {
|
||||||
|
|
||||||
|
public final static String BASE_GATK_URL = "http://www.broadinstitute.org/gatk";
|
||||||
|
public final static String GATK_DOCS_URL = BASE_GATK_URL + "/gatkdocs/";
|
||||||
|
public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
|
||||||
|
public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
|
protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
|
||||||
try {
|
try {
|
||||||
Class type = getClassForDoc(classDoc);
|
Class type = getClassForDoc(classDoc);
|
||||||
|
|
|
||||||
|
|
@ -652,23 +652,19 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
|
||||||
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||||
|
|
||||||
int current = 0;
|
|
||||||
|
|
||||||
for (final String sample : tracker.getSamples()) {
|
for (final String sample : tracker.getSamples()) {
|
||||||
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
|
||||||
|
|
||||||
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
int current = 0;
|
||||||
for (PileupElement p : perSampleElements) {
|
UnifiedPileupElementTracker<PE> filteredPileup = new UnifiedPileupElementTracker<PE>();
|
||||||
|
for (PE p : perSampleElements) {
|
||||||
if (positions.contains(current))
|
if (positions.contains(current))
|
||||||
filteredPileup.add(p);
|
filteredPileup.add(p);
|
||||||
}
|
current++;
|
||||||
|
|
||||||
if (!filteredPileup.isEmpty()) {
|
|
||||||
AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements);
|
|
||||||
filteredTracker.addElements(sample, pileup.pileupElementTracker);
|
|
||||||
}
|
}
|
||||||
|
filteredTracker.addElements(sample, filteredPileup);
|
||||||
current++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return (RBP) createNewPileup(loc, filteredTracker);
|
return (RBP) createNewPileup(loc, filteredTracker);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.broadinstitute.sting.utils.recalibration.covariates;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat;
|
||||||
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created with IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 11/3/12
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class RepeatLengthCovariate implements ExperimentalCovariate {
|
||||||
|
final int MAX_REPEAT_LENGTH = 20;
|
||||||
|
|
||||||
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
|
@Override
|
||||||
|
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||||
|
byte[] readBytes = read.getReadBases();
|
||||||
|
for (int i = 0; i < readBytes.length; i++) {
|
||||||
|
int maxRL = 0;
|
||||||
|
for (int str = 1; str <= 8; str++) {
|
||||||
|
if (i + str <= readBytes.length) {
|
||||||
|
maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
|
||||||
|
Arrays.copyOfRange(readBytes,i,i + str),
|
||||||
|
Arrays.copyOfRange(readBytes,i,readBytes.length)
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; }
|
||||||
|
values.addCovariate(maxRL, maxRL, maxRL, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||||
|
@Override
|
||||||
|
public final Object getValue(final String str) {
|
||||||
|
return Byte.parseByte(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String formatKey(final int key) {
|
||||||
|
return String.format("%d", key);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int keyFromValue(final Object value) {
|
||||||
|
return (value instanceof String) ? Integer.parseInt((String) value) : (Integer) value;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int maximumKeyValue() {
|
||||||
|
return MAX_REPEAT_LENGTH + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -1267,7 +1267,7 @@ public class VariantContextUtils {
|
||||||
* @param testString String to test
|
* @param testString String to test
|
||||||
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
* @return Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
|
||||||
*/
|
*/
|
||||||
protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
|
public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
|
||||||
int numRepeats = 0;
|
int numRepeats = 0;
|
||||||
for (int start = 0; start < testString.length; start += repeatUnit.length) {
|
for (int start = 0; start < testString.length; start += repeatUnit.length) {
|
||||||
int end = start + repeatUnit.length;
|
int end = start + repeatUnit.length;
|
||||||
|
|
|
||||||
|
|
@ -71,7 +71,6 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
|
||||||
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
||||||
|
|
||||||
// has to be PriorityBlockingQueue to be thread-safe
|
// has to be PriorityBlockingQueue to be thread-safe
|
||||||
// see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
|
||||||
this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());
|
this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());
|
||||||
|
|
||||||
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.traversals;
|
package org.broadinstitute.sting.gatk.traversals;
|
||||||
|
|
||||||
|
import com.google.java.contract.PreconditionError;
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -23,6 +25,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
@ -45,6 +48,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
|
|
||||||
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||||
private final double prob;
|
private final double prob;
|
||||||
|
private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
|
||||||
|
|
||||||
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
||||||
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
|
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
|
||||||
|
|
||||||
|
|
@ -52,6 +57,20 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
this.prob = 1.0;
|
this.prob = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DummyActiveRegionWalker(double constProb) {
|
||||||
|
this.prob = constProb;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
|
||||||
|
this.prob = 1.0;
|
||||||
|
this.states = wantStates;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||||
|
return states;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
isActiveCalls.add(ref.getLocus());
|
isActiveCalls.add(ref.getLocus());
|
||||||
|
|
@ -82,7 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
private List<GenomeLoc> intervals;
|
private List<GenomeLoc> intervals;
|
||||||
private List<SAMRecord> reads;
|
private List<GATKSAMRecord> reads;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
private void init() throws FileNotFoundException {
|
private void init() throws FileNotFoundException {
|
||||||
|
|
@ -96,21 +115,21 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||||
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
||||||
// TODO: this fails!
|
intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||||
//intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
|
|
||||||
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
||||||
|
|
||||||
reads = new ArrayList<SAMRecord>();
|
reads = new ArrayList<GATKSAMRecord>();
|
||||||
reads.add(buildSAMRecord("simple", "1", 100, 200));
|
reads.add(buildSAMRecord("simple", "1", 100, 200));
|
||||||
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
|
reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
|
||||||
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
|
reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
|
||||||
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
|
reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
|
||||||
reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
|
reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
|
||||||
reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
|
|
||||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||||
// TODO
|
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||||
//reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
|
|
||||||
|
// required by LocusIteratorByState, and I prefer to list them in test case order above
|
||||||
|
ReadUtils.sortReadsByCoordinate(reads);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -134,6 +153,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
return activeIntervals;
|
return activeIntervals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test (expectedExceptions = PreconditionError.class)
|
||||||
|
public void testIsActiveRangeLow () {
|
||||||
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
|
||||||
|
getActiveRegions(walker, intervals).values();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test (expectedExceptions = PreconditionError.class)
|
||||||
|
public void testIsActiveRangeHigh () {
|
||||||
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
|
||||||
|
getActiveRegions(walker, intervals).values();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testActiveRegionCoverage() {
|
public void testActiveRegionCoverage() {
|
||||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||||
|
|
@ -187,80 +218,210 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testReadMapping() {
|
public void testPrimaryReadMapping() {
|
||||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||||
|
|
||||||
// Contract: Each read has the Primary state in a single region (or none)
|
// Contract: Each read has the Primary state in a single region (or none)
|
||||||
// This is the region of maximum overlap for the read (earlier if tied)
|
// This is the region of maximum overlap for the read (earlier if tied)
|
||||||
|
|
||||||
|
// simple: Primary in 1:1-999
|
||||||
|
// overlap_equal: Primary in 1:1-999
|
||||||
|
// overlap_unequal: Primary in 1:1-999
|
||||||
|
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||||
|
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||||
|
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||||
|
// outside_intervals: none
|
||||||
|
// simple20: Primary in 20:10000-10100
|
||||||
|
|
||||||
|
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||||
|
ActiveRegion region;
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||||
|
|
||||||
|
getRead(region, "simple");
|
||||||
|
getRead(region, "overlap_equal");
|
||||||
|
getRead(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
getRead(region, "boundary_unequal");
|
||||||
|
getRead(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
getRead(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
getRead(region, "simple20");
|
||||||
|
|
||||||
|
// TODO: more tests and edge cases
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNonPrimaryReadMapping() {
|
||||||
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||||
|
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
|
||||||
|
|
||||||
|
// Contract: Each read has the Primary state in a single region (or none)
|
||||||
|
// This is the region of maximum overlap for the read (earlier if tied)
|
||||||
|
|
||||||
|
// Contract: Each read has the Non-Primary state in all other regions it overlaps
|
||||||
|
|
||||||
|
// simple: Primary in 1:1-999
|
||||||
|
// overlap_equal: Primary in 1:1-999
|
||||||
|
// overlap_unequal: Primary in 1:1-999
|
||||||
|
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||||
|
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||||
|
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||||
|
// outside_intervals: none
|
||||||
|
// simple20: Primary in 20:10000-10100
|
||||||
|
|
||||||
|
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||||
|
ActiveRegion region;
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||||
|
|
||||||
|
getRead(region, "simple");
|
||||||
|
getRead(region, "overlap_equal");
|
||||||
|
getRead(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
getRead(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
getRead(region, "boundary_equal");
|
||||||
|
getRead(region, "boundary_unequal");
|
||||||
|
getRead(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
getRead(region, "boundary_equal");
|
||||||
|
getRead(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
getRead(region, "simple20");
|
||||||
|
|
||||||
|
// TODO: more tests and edge cases
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtendedReadMapping() {
|
||||||
|
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||||
|
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
|
||||||
|
|
||||||
|
// Contract: Each read has the Primary state in a single region (or none)
|
||||||
|
// This is the region of maximum overlap for the read (earlier if tied)
|
||||||
|
|
||||||
// Contract: Each read has the Non-Primary state in all other regions it overlaps
|
// Contract: Each read has the Non-Primary state in all other regions it overlaps
|
||||||
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
|
// Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
|
||||||
|
|
||||||
// simple: Primary in 1:1-999
|
// simple: Primary in 1:1-999
|
||||||
// overlap_equal: Primary in 1:1-999
|
// overlap_equal: Primary in 1:1-999
|
||||||
// overlap_unequal: Primary in 1:1-999
|
// overlap_unequal: Primary in 1:1-999
|
||||||
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||||
// boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||||
// extended_only: Extended in 1:2000-2999
|
|
||||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||||
// outside_intervals: none
|
// outside_intervals: none
|
||||||
|
// simple20: Primary in 20:10000-10100
|
||||||
// TODO
|
|
||||||
// simple20: Primary in 20:10000-20000
|
|
||||||
|
|
||||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||||
ActiveRegion region;
|
ActiveRegion region;
|
||||||
|
|
||||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||||
|
|
||||||
verifyReadPrimary(region, "simple");
|
getRead(region, "simple");
|
||||||
verifyReadPrimary(region, "overlap_equal");
|
getRead(region, "overlap_equal");
|
||||||
verifyReadPrimary(region, "overlap_unequal");
|
getRead(region, "overlap_unequal");
|
||||||
verifyReadNotPlaced(region, "boundary_equal");
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
verifyReadNotPlaced(region, "boundary_unequal");
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
verifyReadNotPlaced(region, "extended_only");
|
getRead(region, "extended_and_np");
|
||||||
// TODO: fail verifyReadNonPrimary(region, "extended_and_np");
|
|
||||||
verifyReadNotPlaced(region, "outside_intervals");
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||||
|
|
||||||
verifyReadNotPlaced(region, "simple");
|
verifyReadNotPlaced(region, "simple");
|
||||||
verifyReadNotPlaced(region, "overlap_equal");
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
verifyReadNotPlaced(region, "overlap_unequal");
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
// TODO: fail verifyReadPrimary(region, "boundary_equal");
|
getRead(region, "boundary_equal");
|
||||||
// TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
|
getRead(region, "boundary_unequal");
|
||||||
verifyReadNotPlaced(region, "extended_only");
|
getRead(region, "extended_and_np");
|
||||||
// TODO: fail verifyReadPrimary(region, "extended_and_np");
|
|
||||||
verifyReadNotPlaced(region, "outside_intervals");
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||||
|
|
||||||
verifyReadNotPlaced(region, "simple");
|
verifyReadNotPlaced(region, "simple");
|
||||||
verifyReadNotPlaced(region, "overlap_equal");
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
verifyReadNotPlaced(region, "overlap_unequal");
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
// TODO: fail verifyReadNonPrimary(region, "boundary_equal");
|
getRead(region, "boundary_equal");
|
||||||
verifyReadPrimary(region, "boundary_unequal");
|
getRead(region, "boundary_unequal");
|
||||||
// TODO: fail verifyReadExtended(region, "extended_only");
|
getRead(region, "extended_and_np");
|
||||||
// TODO: fail verifyReadExtended(region, "extended_and_np");
|
|
||||||
verifyReadNotPlaced(region, "outside_intervals");
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
verifyReadNotPlaced(region, "simple20");
|
||||||
|
|
||||||
|
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||||
|
|
||||||
|
verifyReadNotPlaced(region, "simple");
|
||||||
|
verifyReadNotPlaced(region, "overlap_equal");
|
||||||
|
verifyReadNotPlaced(region, "overlap_unequal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_equal");
|
||||||
|
verifyReadNotPlaced(region, "boundary_unequal");
|
||||||
|
verifyReadNotPlaced(region, "extended_and_np");
|
||||||
|
verifyReadNotPlaced(region, "outside_intervals");
|
||||||
|
getRead(region, "simple20");
|
||||||
|
|
||||||
// TODO: more tests and edge cases
|
// TODO: more tests and edge cases
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verifyReadPrimary(ActiveRegion region, String readName) {
|
|
||||||
SAMRecord read = getRead(region, readName);
|
|
||||||
Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void verifyReadNonPrimary(ActiveRegion region, String readName) {
|
|
||||||
SAMRecord read = getRead(region, readName);
|
|
||||||
Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void verifyReadExtended(ActiveRegion region, String readName) {
|
|
||||||
Assert.fail("The Extended read state has not been implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
|
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
|
||||||
for (SAMRecord read : region.getReads()) {
|
for (SAMRecord read : region.getReads()) {
|
||||||
if (read.getReadName().equals(readName))
|
if (read.getReadName().equals(readName))
|
||||||
|
|
@ -274,7 +435,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
|
|
||||||
Assert.fail("Read " + readName + " not found in active region " + region);
|
Assert.fail("Read " + readName + " not assigned to active region " + region);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -282,6 +443,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
|
for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
|
||||||
t.traverse(walker, dataProvider, 0);
|
t.traverse(walker, dataProvider, 0);
|
||||||
|
|
||||||
|
t.endTraversal(walker, 0);
|
||||||
|
|
||||||
return walker.mappedActiveRegions;
|
return walker.mappedActiveRegions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -345,7 +508,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
||||||
engine.setGenomeLocParser(genomeLocParser);
|
engine.setGenomeLocParser(genomeLocParser);
|
||||||
t.initialize(engine);
|
t.initialize(engine);
|
||||||
|
|
||||||
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
|
StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>(reads));
|
||||||
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
Shard shard = new MockLocusShard(genomeLocParser, intervals);
|
||||||
|
|
||||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -74,6 +76,23 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
||||||
genomeLocParser.parseGenomeLoc("Bad:0-1");
|
genomeLocParser.parseGenomeLoc("Bad:0-1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testContigHasColon() {
|
||||||
|
SAMFileHeader header = new SAMFileHeader();
|
||||||
|
header.setSortOrder(net.sf.samtools.SAMFileHeader.SortOrder.coordinate);
|
||||||
|
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||||
|
SAMSequenceRecord rec = new SAMSequenceRecord("c:h:r1", 10);
|
||||||
|
rec.setSequenceLength(10);
|
||||||
|
dict.addSequence(rec);
|
||||||
|
header.setSequenceDictionary(dict);
|
||||||
|
|
||||||
|
final GenomeLocParser myGenomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
|
GenomeLoc loc = myGenomeLocParser.parseGenomeLoc("c:h:r1:4-5");
|
||||||
|
assertEquals(0, loc.getContigIndex());
|
||||||
|
assertEquals(loc.getStart(), 4);
|
||||||
|
assertEquals(loc.getStop(), 5);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testParseGoodString() {
|
public void testParseGoodString() {
|
||||||
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
|
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
|
||||||
|
|
@ -81,7 +100,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
||||||
assertEquals(loc.getStop(), 10);
|
assertEquals(loc.getStop(), 10);
|
||||||
assertEquals(loc.getStart(), 1);
|
assertEquals(loc.getStart(), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateGenomeLoc1() {
|
public void testCreateGenomeLoc1() {
|
||||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 1, 100);
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
|
||||||
public void testReadingAndWritingBreakpointAlleles() {
|
public void testReadingAndWritingBreakpointAlleles() {
|
||||||
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -793,7 +793,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
|
||||||
|
|
||||||
int sampleI = 0;
|
int sampleI = 0;
|
||||||
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
|
||||||
genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
|
genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
|
||||||
}
|
}
|
||||||
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
genotypes.add(GenotypeBuilder.createMissing("missing", 2));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,9 @@
|
||||||
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
|
<dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
|
||||||
<!-- All R scripts in org.broadinstitute.sting -->
|
<!-- All R scripts in org.broadinstitute.sting -->
|
||||||
<dir name="org/broadinstitute/sting" includes="**/*.R" />
|
<dir name="org/broadinstitute/sting" includes="**/*.R" />
|
||||||
|
<!-- Resources in org.broadinstitute.sting -->
|
||||||
|
<dir name="org/broadinstitute/sting" includes="**/resources/*" />
|
||||||
|
<dir name="org/broadinstitute/sting" includes="**/templates/*" />
|
||||||
<!-- The GATK public key -->
|
<!-- The GATK public key -->
|
||||||
<file path="GATK_public.key" />
|
<file path="GATK_public.key" />
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
|
||||||
|
|
@ -72,6 +72,10 @@ class QSettings {
|
||||||
@Argument(fullName="resident_memory_request_parameter", shortName="resMemReqParam", doc="Parameter for resident memory requests. By default not requested.", required=false)
|
@Argument(fullName="resident_memory_request_parameter", shortName="resMemReqParam", doc="Parameter for resident memory requests. By default not requested.", required=false)
|
||||||
var residentRequestParameter: String = _
|
var residentRequestParameter: String = _
|
||||||
|
|
||||||
|
@Argument(fullName="job_walltime", shortName="wallTime", doc="Setting the required DRMAA walltime or LSF run limit.", required=false)
|
||||||
|
@ClassType(classOf[Long])
|
||||||
|
var jobWalltime: Option[Long] = None
|
||||||
|
|
||||||
/** The name of the parallel environment (required for SGE, for example) */
|
/** The name of the parallel environment (required for SGE, for example) */
|
||||||
@Argument(fullName="job_parallel_env", shortName="jobParaEnv", doc="An SGE style parallel environment to use for jobs requesting more than 1 core. Equivalent to submitting jobs with -pe ARG nt for jobs with nt > 1", required=false)
|
@Argument(fullName="job_parallel_env", shortName="jobParaEnv", doc="An SGE style parallel environment to use for jobs requesting more than 1 core. Equivalent to submitting jobs with -pe ARG nt for jobs with nt > 1", required=false)
|
||||||
var parallelEnvironmentName: String = "smp_pe" // Broad default
|
var parallelEnvironmentName: String = "smp_pe" // Broad default
|
||||||
|
|
|
||||||
|
|
@ -65,6 +65,9 @@ class DrmaaJobRunner(val session: Session, val function: CommandLineFunction) ex
|
||||||
drmaaJob.setJoinFiles(true)
|
drmaaJob.setJoinFiles(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!function.wallTime.isEmpty)
|
||||||
|
drmaaJob.setHardWallclockTimeLimit(function.wallTime.get)
|
||||||
|
|
||||||
drmaaJob.setNativeSpecification(functionNativeSpec)
|
drmaaJob.setNativeSpecification(functionNativeSpec)
|
||||||
|
|
||||||
// Instead of running the function.commandLine, run "sh <jobScript>"
|
// Instead of running the function.commandLine, run "sh <jobScript>"
|
||||||
|
|
|
||||||
|
|
@ -151,8 +151,11 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
|
||||||
throw new QException("setOption_() returned -1 while setting esub");
|
throw new QException("setOption_() returned -1 while setting esub");
|
||||||
}
|
}
|
||||||
|
|
||||||
// LSF specific: get the max runtime for the jobQueue and pass it for this job
|
if(!function.wallTime.isEmpty)
|
||||||
request.rLimits(LibLsf.LSF_RLIMIT_RUN) = Lsf706JobRunner.getRlimitRun(function.jobQueue)
|
request.rLimits(LibLsf.LSF_RLIMIT_RUN) = function.wallTime.get.toInt
|
||||||
|
else
|
||||||
|
// LSF specific: get the max runtime for the jobQueue and pass it for this job
|
||||||
|
request.rLimits(LibLsf.LSF_RLIMIT_RUN) = Lsf706JobRunner.getRlimitRun(function.jobQueue)
|
||||||
|
|
||||||
// Run the command as sh <jobScript>
|
// Run the command as sh <jobScript>
|
||||||
request.command = "sh " + jobScript
|
request.command = "sh " + jobScript
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,9 @@ import org.broadinstitute.sting.commandline.Argument
|
||||||
trait CommandLineFunction extends QFunction with Logging {
|
trait CommandLineFunction extends QFunction with Logging {
|
||||||
def commandLine: String
|
def commandLine: String
|
||||||
|
|
||||||
|
/** Setting the wall time request for DRMAA / run limit for LSF */
|
||||||
|
var wallTime: Option[Long] = None
|
||||||
|
|
||||||
/** Upper memory limit */
|
/** Upper memory limit */
|
||||||
@Argument(doc="Memory limit", required=false)
|
@Argument(doc="Memory limit", required=false)
|
||||||
var memoryLimit: Option[Double] = None
|
var memoryLimit: Option[Double] = None
|
||||||
|
|
@ -67,6 +70,9 @@ trait CommandLineFunction extends QFunction with Logging {
|
||||||
super.copySettingsTo(function)
|
super.copySettingsTo(function)
|
||||||
function match {
|
function match {
|
||||||
case commandLineFunction: CommandLineFunction =>
|
case commandLineFunction: CommandLineFunction =>
|
||||||
|
if(commandLineFunction.wallTime.isEmpty)
|
||||||
|
commandLineFunction.wallTime = this.wallTime
|
||||||
|
|
||||||
if (commandLineFunction.memoryLimit.isEmpty)
|
if (commandLineFunction.memoryLimit.isEmpty)
|
||||||
commandLineFunction.memoryLimit = this.memoryLimit
|
commandLineFunction.memoryLimit = this.memoryLimit
|
||||||
|
|
||||||
|
|
@ -110,6 +116,10 @@ trait CommandLineFunction extends QFunction with Logging {
|
||||||
* Sets all field values.
|
* Sets all field values.
|
||||||
*/
|
*/
|
||||||
override def freezeFieldValues() {
|
override def freezeFieldValues() {
|
||||||
|
|
||||||
|
if(wallTime.isEmpty)
|
||||||
|
wallTime = qSettings.jobWalltime
|
||||||
|
|
||||||
if (jobQueue == null)
|
if (jobQueue == null)
|
||||||
jobQueue = qSettings.jobQueue
|
jobQueue = qSettings.jobQueue
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -126,4 +126,15 @@ class HelloWorldPipelineTest {
|
||||||
spec.jobRunners = Seq("GridEngine")
|
spec.jobRunners = Seq("GridEngine")
|
||||||
PipelineTest.executeTest(spec)
|
PipelineTest.executeTest(spec)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// disabled because our DRMAA implementation doesn't support wallTime
|
||||||
|
@Test(enabled=false, timeOut=36000000)
|
||||||
|
def testHelloWorldWithWalltime() {
|
||||||
|
val spec = new PipelineTestSpec
|
||||||
|
spec.name = "HelloWorldWithWalltime"
|
||||||
|
spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" +
|
||||||
|
" -wallTime 100"
|
||||||
|
spec.jobRunners = PipelineTest.allJobRunners
|
||||||
|
PipelineTest.executeTest(spec)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue