Removed stochasticity from IndelRealigner by random sampling using and seed based on the read list.
Updated the Queue scatter/gather for read walkers to include -L unmapped on the last scatter job when intervals aren't specified, and to map it correctly when it is explicitly set. Simplified the build.xml/ivy.xml to fix a bug reported with "ant clean dist test" where the scalac target wasn't found. Now building all scala code at the same time, just like all java code is compiled at the same time. Sped up the build for everyone by uncommenting a small bit of classes so that javac/scalac will not constantly launch trying to build .class files that will never compile. Moved some source files to their expected location so that the .java/.scala -> .class is a one-to-one match, again keeping the compilers from wasting cycles. Used <uptodate> and <touch> to skip extracting the help text and generating the GATK Queue extensions when the source files haven't been modified. Fixed a couple errors when the <javadoc> task is run. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4963 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
be67161b47
commit
b34e2f733f
196
build.xml
196
build.xml
|
|
@ -10,8 +10,8 @@
|
|||
<property name="resource.file" value="StingText.properties" />
|
||||
<property name="resource.path" value="${java.classes}/StingText.properties" />
|
||||
|
||||
<property name="queue.source.dir" value="scala/src" />
|
||||
<property name="queue.classes" value="${build.dir}/scala/classes" />
|
||||
<property name="scala.source.dir" value="scala/src" />
|
||||
<property name="scala.classes" value="${build.dir}/scala/classes" />
|
||||
|
||||
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
|
||||
|
||||
|
|
@ -73,14 +73,14 @@
|
|||
</fileset>
|
||||
</path>
|
||||
|
||||
<!-- Path to queue dependencies. -->
|
||||
<path id="queue.dependencies">
|
||||
<!-- Path to scala dependencies. -->
|
||||
<path id="scala.dependencies">
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}" />
|
||||
<!-- Need the resources as we will be running a command line program which needs the help text. -->
|
||||
<pathelement location="${resource.path}" />
|
||||
<!-- Add any previously compiled queue classes to the path. -->
|
||||
<pathelement location="${queue.classes}" />
|
||||
<!-- Add any previously compiled scala classes to the path. -->
|
||||
<pathelement location="${scala.classes}" />
|
||||
</path>
|
||||
|
||||
<target name="init.resolve" unless="init.resolve.done">
|
||||
|
|
@ -105,12 +105,7 @@
|
|||
|
||||
<target name="resolve" depends="init.resolve,init"
|
||||
description="locate and download library dependencies">
|
||||
<property name="gatk.ivy.conf" value="default" />
|
||||
<property name="scala.ivy.conf" value="default" />
|
||||
<condition property="queue.ivy.conf" value="queue" else="default">
|
||||
<isset property="queue.include" />
|
||||
</condition>
|
||||
<property name="ivy.conf" value="default, ${gatk.ivy.conf}, ${scala.ivy.conf}, ${queue.ivy.conf}"/>
|
||||
<property name="ivy.conf" value="default"/>
|
||||
<ivy:retrieve file="ivy.xml" conf="${ivy.conf}" />
|
||||
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
|
||||
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
|
||||
|
|
@ -118,9 +113,9 @@
|
|||
</target>
|
||||
|
||||
<target name="init.buildall">
|
||||
<!-- Set the properties needed to build Queue -->
|
||||
<!-- Set the properties needed to build everything -->
|
||||
<property name="gatk.target" value="oneoffs"/>
|
||||
<property name="queue.target" value="core"/>
|
||||
<property name="scala.target" value="core"/>
|
||||
</target>
|
||||
|
||||
<!-- define some key locations that might change based on how the build is run -->
|
||||
|
|
@ -144,31 +139,24 @@
|
|||
</condition>
|
||||
|
||||
<!-- Get the queue build target. Default to none. -->
|
||||
<condition property="queue.target" value="none" else="${env.QUEUE_BUILD_TYPE}">
|
||||
<condition property="scala.target" value="none" else="${env.QUEUE_BUILD_TYPE}">
|
||||
<equals arg1="${env.QUEUE_BUILD_TYPE}" arg2="$${env.QUEUE_BUILD_TYPE}" />
|
||||
</condition>
|
||||
|
||||
<!-- If the queue target is set, or if the queue-extensions needs to be built, then include all queue tasks. -->
|
||||
<condition property="queue.include">
|
||||
<!-- If the scala target is set, then include all scala tasks. -->
|
||||
<condition property="scala.include">
|
||||
<or>
|
||||
<not><equals arg1="${queue.target}" arg2="none" /></not>
|
||||
<not><equals arg1="${scala.target}" arg2="none" /></not>
|
||||
</or>
|
||||
</condition>
|
||||
|
||||
<!-- If queue is being built, then include scala tasks (init.scalatasks) -->
|
||||
<condition property="scala.include">
|
||||
<or>
|
||||
<isset property="queue.include" />
|
||||
</or>
|
||||
</condition>
|
||||
|
||||
<!-- Get the pipeline run type. Default to dry. -->
|
||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||
</condition>
|
||||
|
||||
<echo message="GATK build : ${gatk.target}"/>
|
||||
<echo message="Queue build : ${queue.target}"/>
|
||||
<echo message="Scala build : ${scala.target}"/>
|
||||
<echo message="source revision : ${build.version}"/>
|
||||
<echo message="build time : ${build.timestamp}" />
|
||||
|
||||
|
|
@ -202,7 +190,7 @@
|
|||
</pathconvert>
|
||||
</target>
|
||||
|
||||
<target name="init.scalatasks" depends="resolve" if="scala.include"
|
||||
<target name="init.scala.compile" depends="resolve"
|
||||
description="Initializes the scala ant tasks from scala-compiler.jar">
|
||||
<path id="scala.classpath">
|
||||
<fileset dir="lib">
|
||||
|
|
@ -232,33 +220,61 @@
|
|||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="init.queue-extensions.generate" depends="gatk.compile">
|
||||
<condition property="uptodate.queue-extensions.generate">
|
||||
<or>
|
||||
<not>
|
||||
<isset property="scala.include"/>
|
||||
</not>
|
||||
<uptodate targetfile="${queue-extensions.source.dir}">
|
||||
<srcfiles refid="java.source.files"/>
|
||||
<srcfiles refid="external.source.files"/>
|
||||
</uptodate>
|
||||
</or>
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
<!-- NOTE: Extracting help first to avoid "Unable to load help text. Help output will be sparse." warning message. -->
|
||||
<target name="queue-extensions.generate" depends="gatk.compile, extracthelp" if="queue.include" description="generate GATK modules for Queue">
|
||||
<target name="queue-extensions.generate" depends="extracthelp,init.queue-extensions.generate" unless="uptodate.queue-extensions.generate" description="generate GATK modules for Queue">
|
||||
<mkdir dir="${queue-extensions.source.dir}"/>
|
||||
<echo>Generating Queue GATK extensions...</echo>
|
||||
<java fork="true" failonerror="true" classname="org.broadinstitute.sting.queue.extensions.gatk.GATKExtensionsGenerator" classpathref="queue.dependencies">
|
||||
<java fork="true" failonerror="true" classname="org.broadinstitute.sting.queue.extensions.gatk.GATKExtensionsGenerator" classpathref="scala.dependencies">
|
||||
<arg value="-outDir" />
|
||||
<arg path="${queue-extensions.source.dir}" />
|
||||
<arg value="-l" />
|
||||
<arg value="WARN" />
|
||||
</java>
|
||||
<touch>
|
||||
<file file="${queue-extensions.source.dir}"/>
|
||||
</touch>
|
||||
</target>
|
||||
|
||||
<!-- Queue depends on the gatk since it contains the StingUtils (including CommandLine) -->
|
||||
<target name="queue.compile" depends="init,resolve,gatk.compile,init.scalatasks,queue-extensions.generate" if="queue.include" description="compile Queue">
|
||||
<mkdir dir="${queue.classes}"/>
|
||||
<echo>Building Queue...</echo>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${queue.classes}" classpathref="queue.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${queue.source.dir}" />
|
||||
<!-- Scala depends on the java compile -->
|
||||
<target name="scala.compile" depends="init,resolve,gatk.compile,queue-extensions.generate,init.scala.compile" if="scala.include" description="compile Scala">
|
||||
<mkdir dir="${scala.classes}"/>
|
||||
<echo>Building Scala...</echo>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.source.dir}" />
|
||||
<src path="${queue-extensions.source.dir}" />
|
||||
<include name="**/*.scala"/>
|
||||
<exclude name="*.scala"/>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
<target name="extracthelp" depends="init,gatk.compile"
|
||||
<target name="init.extracthelp" depends="init,gatk.compile">
|
||||
<condition property="uptodate.extracthelp">
|
||||
<or>
|
||||
<isset property="disable.help"/>
|
||||
<uptodate targetfile="${basedir}/${resource.path}">
|
||||
<srcfiles refid="java.source.files"/>
|
||||
<srcfiles refid="external.source.files"/>
|
||||
</uptodate>
|
||||
</or>
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
<target name="extracthelp" depends="init.extracthelp"
|
||||
description="Extract help key/value pair file from the JavaDoc tags."
|
||||
unless="disable.help">
|
||||
unless="uptodate.extracthelp">
|
||||
<path id="doclet.classpath">
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}" />
|
||||
|
|
@ -273,7 +289,7 @@
|
|||
</javadoc>
|
||||
</target>
|
||||
|
||||
<target name="sting.compile" depends="gatk.compile, queue.compile" />
|
||||
<target name="sting.compile" depends="gatk.compile, scala.compile" />
|
||||
|
||||
<target name="init.jar" depends="sting.compile,extracthelp">
|
||||
<mkdir dir="${dist.dir}"/>
|
||||
|
|
@ -333,9 +349,17 @@
|
|||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="queue.jar" depends="queue.compile, init.jar" if="queue.include">
|
||||
<target name="scala.jar" depends="scala.compile, init.jar" if="scala.include">
|
||||
<jar jarfile="${dist.dir}/GATKScala.jar">
|
||||
<fileset dir="${scala.classes}">
|
||||
<include name="org/broadinstitute/sting/scala/**/*.class"/>
|
||||
</fileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<target name="queue.jar" depends="scala.compile, init.jar" if="scala.include">
|
||||
<jar jarfile="${dist.dir}/Queue.jar">
|
||||
<fileset dir="${queue.classes}">
|
||||
<fileset dir="${scala.classes}">
|
||||
<include name="org/broadinstitute/sting/queue/**/*.class"/>
|
||||
</fileset>
|
||||
<fileset dir="${java.classes}">
|
||||
|
|
@ -381,7 +405,7 @@
|
|||
|
||||
</target>
|
||||
|
||||
<target name="queue.manifests" depends="queue.jar, init.manifests" if="queue.include">
|
||||
<target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">
|
||||
<jar jarfile="${dist.dir}/Queue.jar" update="true" >
|
||||
<manifest>
|
||||
<attribute name="Class-Path" value="${jar.classpath}" />
|
||||
|
|
@ -416,16 +440,20 @@
|
|||
inheritAll is false so that 'ant queue' does not accidentally import
|
||||
params if the build was called with 'ant clean oneoffs queue'.
|
||||
Instead this task resets the parameters and is just like running
|
||||
a fresh 'ant dist -Dqueue.target=core'.
|
||||
a fresh 'ant dist -Dscala.target=core'.
|
||||
-->
|
||||
<antcall target="dist" inheritAll="false">
|
||||
<param name="queue.target" value="core" />
|
||||
<param name="scala.target" value="core" />
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
<target name="test.compile" depends="init.buildall,dist">
|
||||
<echo message="Sting: Compiling test cases!"/>
|
||||
<target name="test.init.compile">
|
||||
<mkdir dir="${java.test.classes}"/>
|
||||
<mkdir dir="${scala.test.classes}"/>
|
||||
</target>
|
||||
|
||||
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
|
||||
<echo message="Sting: Compiling test cases!"/>
|
||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on">
|
||||
<src path="${java.test.sources}"/>
|
||||
<classpath>
|
||||
|
|
@ -434,55 +462,30 @@
|
|||
<pathelement location="lib/testng-5.14.1.jar"/>
|
||||
</classpath>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<echo message="Queue: Compiling test cases!"/>
|
||||
<mkdir dir="${queue.test.classes}"/>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" srcdir="${queue.test.sources}" destdir="${queue.test.classes}" deprecation="yes" unchecked="yes">
|
||||
<include name="org/broadinstitute/sting/queue/**/*.scala"/>
|
||||
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
||||
<echo message="Scala: Compiling test cases!"/>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" srcdir="${scala.test.sources}" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
|
||||
<include name="**/*.scala"/>
|
||||
<classpath>
|
||||
<path refid="queue.dependencies"/>
|
||||
<pathelement location="${queue.classes}"/>
|
||||
<path refid="scala.dependencies"/>
|
||||
<pathelement location="${scala.classes}"/>
|
||||
<pathelement location="${java.test.classes}"/>
|
||||
<pathelement location="lib/testng-5.14.1.jar"/>
|
||||
</classpath>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
<target name="test.compile" depends="test.java.compile,test.scala.compile">
|
||||
</target>
|
||||
|
||||
<!-- new scala target -->
|
||||
|
||||
<target name="scala" description="build the scala directory">
|
||||
<antcall target="resolve">
|
||||
<param name="ivy.conf" value="scala"/>
|
||||
</antcall>
|
||||
<antcall target="dist"/>
|
||||
<property name="scala.source.dir" value="scala/src" />
|
||||
<property name="scala.classes" value="build/scala" />
|
||||
|
||||
<path id="scala.classpath">
|
||||
<fileset dir="lib">
|
||||
<include name="scala-compiler-*.jar"/>
|
||||
<include name="scala-library-*.jar"/>
|
||||
</fileset>
|
||||
<fileset dir="${dist.dir}">
|
||||
<patternset id="scalaStuff">
|
||||
<include name="*.jar"/>
|
||||
</patternset>
|
||||
</fileset>
|
||||
</path>
|
||||
<taskdef resource="scala/tools/ant/antlib.xml">
|
||||
<classpath refid="scala.classpath"/>
|
||||
</taskdef>
|
||||
<mkdir dir="${scala.classes}"/>
|
||||
<echo>Building Scala...</echo>
|
||||
<scalac fork="true" srcdir="${scala.source.dir}" destdir="${scala.classes}" classpathref="scala.classpath" force="changed">
|
||||
<include name="*.scala"/>
|
||||
</scalac>
|
||||
|
||||
<jar jarfile="${dist.dir}/GATKScala.jar">
|
||||
<fileset dir="${scala.classes}">
|
||||
<include name="*.class"/>
|
||||
</fileset>
|
||||
</jar>
|
||||
<antcall target="scala.jar" inheritAll="false">
|
||||
<param name="scala.target" value="core" />
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
<!-- ***************************************************************************** -->
|
||||
|
|
@ -493,8 +496,8 @@
|
|||
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
||||
<property name="test.output" value="${dist.dir}/test"/>
|
||||
<property name="java.test.sources" value="java/test"/>
|
||||
<property name="queue.test.classes" value="${build.dir}/scala/testclasses"/>
|
||||
<property name="queue.test.sources" value="scala/test"/>
|
||||
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
|
||||
<property name="scala.test.sources" value="scala/test"/>
|
||||
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
|
||||
<property name="test.maxmemory" value="4g"/>
|
||||
|
||||
|
|
@ -521,13 +524,13 @@
|
|||
<classpath>
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}" />
|
||||
<pathelement location="${queue.classes}" />
|
||||
<pathelement location="${scala.classes}" />
|
||||
<pathelement location="${java.test.classes}" />
|
||||
<pathelement location="${queue.test.classes}" />
|
||||
<pathelement location="${scala.test.classes}" />
|
||||
</classpath>
|
||||
|
||||
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
|
||||
<classfileset dir="${queue.test.classes}" includes="**/@{testtype}*.class" />
|
||||
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
|
||||
</testng>
|
||||
|
||||
<!-- generate a report for Bamboo or Hudson to read in -->
|
||||
|
|
@ -599,20 +602,21 @@
|
|||
|
||||
<target name="clean.javadoc">
|
||||
<delete dir="javadoc"/>
|
||||
<delete dir="scaladoc"/>
|
||||
</target>
|
||||
|
||||
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate" description="generates javadoc">
|
||||
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate,init.scala.compile" description="generates javadoc">
|
||||
<mkdir dir="javadoc"/>
|
||||
<javadoc destdir="javadoc"
|
||||
classpathref="external.dependencies">
|
||||
<packageset refid="java.source.files"/>
|
||||
<fileset refid="java.source.files"/>
|
||||
<fileset refid="external.source.files"/>
|
||||
</javadoc>
|
||||
|
||||
<mkdir dir="javadoc/queue"/>
|
||||
<scaladoc srcdir="" destdir="javadoc/queue" classpathref="queue.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${queue.source.dir}"/>
|
||||
<mkdir dir="scaladoc"/>
|
||||
<scaladoc srcdir="" destdir="scaladoc" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.source.dir}"/>
|
||||
<src path="${queue-extensions.source.dir}"/>
|
||||
<include name="org/broadinstitute/sting/queue/**/*.scala"/>
|
||||
<include name="**/*.scala"/>
|
||||
</scaladoc>
|
||||
</target>
|
||||
|
||||
|
|
|
|||
10
ivy.xml
10
ivy.xml
|
|
@ -33,20 +33,18 @@
|
|||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
||||
|
||||
<!-- Commons Dependencies -->
|
||||
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
|
||||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
||||
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="2.0"/>
|
||||
|
||||
<!-- Dependencies for LSF library -->
|
||||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||
|
||||
<!-- Dependencies for Queue GATK Extensions code generator living in java/src -->
|
||||
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
||||
|
||||
<!-- Scala dependancies -->
|
||||
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1" conf="scala->default"/>
|
||||
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1" conf="scala->default"/>
|
||||
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
|
||||
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
|
||||
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>
|
||||
|
||||
<!-- findbug dependencies -->
|
||||
<dependency org="net.sourceforge.findbugs" name="findbugs" rev="1.3.2" conf="findbugs->default"/>
|
||||
|
|
|
|||
|
|
@ -182,10 +182,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private final ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
|
||||
private final IdentityHashMap<Object, VariantContext> knownIndelsToTry = new IdentityHashMap<Object, VariantContext>();
|
||||
|
||||
// random number generator
|
||||
private static final long RANDOM_SEED = 1252863495;
|
||||
private static final Random generator = new Random(RANDOM_SEED);
|
||||
|
||||
private static final int MAX_QUAL = 99;
|
||||
|
||||
// fraction of mismatches that need to no longer mismatch for a column to be considered cleaned
|
||||
|
|
@ -614,9 +610,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// if there are reads with a single indel in them, add that indel to the list of alternate consenses
|
||||
long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference);
|
||||
|
||||
// use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference
|
||||
// use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed
|
||||
if ( !USE_KNOWN_INDELS_ONLY )
|
||||
generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex);
|
||||
generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex, totalRawMismatchSum);
|
||||
|
||||
// if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
|
||||
|
||||
|
|
@ -867,7 +863,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
private void generateAlternateConsensesFromReads(final LinkedList<AlignedRead> altAlignmentsToTest,
|
||||
final Set<Consensus> altConsensesToPopulate,
|
||||
final byte[] reference,
|
||||
final int leftmostIndex) {
|
||||
final int leftmostIndex,
|
||||
final long randomSeed) {
|
||||
|
||||
// if we are under the limit, use all reads to generate alternate consenses
|
||||
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
|
||||
|
|
@ -879,6 +876,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// otherwise, choose reads for alternate consenses randomly
|
||||
else {
|
||||
int readsSeen = 0;
|
||||
Random generator = new Random(randomSeed);
|
||||
while ( readsSeen++ < MAX_READS_FOR_CONSENSUSES && altConsensesToPopulate.size() <= MAX_CONSENSUSES) {
|
||||
int index = generator.nextInt(altAlignmentsToTest.size());
|
||||
AlignedRead aRead = altAlignmentsToTest.remove(index);
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ public class IntervalUtils {
|
|||
return null;
|
||||
}
|
||||
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
|
||||
else if(fileOrInterval.trim().toLowerCase().equals("unmapped"))
|
||||
else if (isUnmapped(fileOrInterval))
|
||||
rawIntervals.add(GenomeLoc.UNMAPPED);
|
||||
// if it's a file, add items to raw interval list
|
||||
else if (isIntervalFile(fileOrInterval)) {
|
||||
|
|
@ -75,6 +75,15 @@ public class IntervalUtils {
|
|||
return rawIntervals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the interval string is the "unmapped" interval
|
||||
* @param interval Interval to check
|
||||
* @return true if the interval string is the "unmapped" interval
|
||||
*/
|
||||
public static boolean isUnmapped(String interval) {
|
||||
return (interval != null && interval.trim().toLowerCase().equals("unmapped"));
|
||||
}
|
||||
|
||||
/**
|
||||
* merge two interval lists, using an interval set rule
|
||||
* @param setOne a list of genomeLocs, in order (cannot be NULL)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import org.broadinstitute.sting.commandline.ArgumentCollection
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.library.ipf.ExpandIntervals
|
||||
import org.broadinstitute.sting.queue.library.ipf.intervals.ExpandIntervals
|
||||
import org.broadinstitute.sting.queue.pipeline.PipelineArgumentCollection
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.utils.text.XReadLines
|
||||
|
|
|
|||
|
|
@ -101,10 +101,8 @@ class fullCallingPipeline extends QScript {
|
|||
//val seq = qscript.machine
|
||||
//val expKind = qscript.protocol
|
||||
|
||||
// get contigs (needed for indel cleaning parallelism)
|
||||
val contigs = IntervalUtils.distinctContigs(
|
||||
qscript.pipeline.getProject.getReferenceFile,
|
||||
List(qscript.pipeline.getProject.getIntervalList.getAbsolutePath)).toList
|
||||
// get max num contigs for indel cleaning parallelism, plus 1 for -L unmapped
|
||||
val numContigs = IntervalUtils.distinctContigs(qscript.pipeline.getProject.getReferenceFile).size + 1
|
||||
|
||||
for ( sample <- recalibratedSamples ) {
|
||||
val sampleId = sample.getId
|
||||
|
|
@ -134,13 +132,12 @@ class fullCallingPipeline extends QScript {
|
|||
realigner.targetIntervals = targetCreator.out
|
||||
realigner.intervals = Nil
|
||||
realigner.intervalsString = Nil
|
||||
realigner.scatterCount = num_cleaner_scatter_jobs min contigs.size
|
||||
realigner.scatterCount = num_cleaner_scatter_jobs min numContigs
|
||||
realigner.rodBind :+= RodBind("dbsnp", dbsnpType, qscript.pipeline.getProject.getDbsnpFile)
|
||||
realigner.rodBind :+= RodBind("indels", "VCF", swapExt(realigner.reference_sequence.getParentFile, realigner.reference_sequence, "fasta", "1kg_pilot_indels.vcf"))
|
||||
|
||||
// if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates
|
||||
if (realigner.scatterCount > 1) {
|
||||
realigner.intervalsString = contigs
|
||||
realigner.out = cleaned_bam
|
||||
// While gathering run fix mates.
|
||||
realigner.setupScatterFunction = {
|
||||
|
|
|
|||
|
|
@ -1,5 +0,0 @@
|
|||
object HelloWorld {
|
||||
def main(args: Array[String]) {
|
||||
println("Hello, world!")
|
||||
}
|
||||
}
|
||||
|
|
@ -2,16 +2,11 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import org.broadinstitute.sting.commandline.ArgumentSource
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource
|
||||
import java.io.File
|
||||
import net.sf.picard.util.IntervalList
|
||||
import net.sf.samtools.SAMFileHeader
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
|
||||
import org.broadinstitute.sting.queue.util.IOUtils
|
||||
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterGatherableFunction, ScatterFunction}
|
||||
import org.broadinstitute.sting.queue.function.{QFunction, InProcessFunction}
|
||||
import org.broadinstitute.sting.queue.QException
|
||||
|
||||
/**
|
||||
* An interval scatter function.
|
||||
|
|
@ -19,11 +14,30 @@ import org.broadinstitute.sting.queue.QException
|
|||
class IntervalScatterFunction extends ScatterFunction with InProcessFunction {
|
||||
var splitByContig = false
|
||||
|
||||
/** The total number of clone jobs that will be created. */
|
||||
private var scatterCount: Int = _
|
||||
|
||||
/** The reference sequence for the GATK function. */
|
||||
private var referenceSequence: File = _
|
||||
|
||||
/** The runtime field to set for specifying an interval file. */
|
||||
private var intervalsField: ArgumentSource = _
|
||||
|
||||
/** The runtime field to set for specifying an interval string. */
|
||||
private var intervalsStringField: ArgumentSource = _
|
||||
|
||||
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
|
||||
private var intervals: List[String] = Nil
|
||||
|
||||
/** Whether the laster scatter job should also include any unmapped reads. */
|
||||
private var includeUnmapped: Boolean = _
|
||||
|
||||
/**
|
||||
* Checks if the function is scatter gatherable.
|
||||
* @param originalFunction Function to check.
|
||||
* @return true if the function is a GATK function with the reference sequence set.
|
||||
* @throws IllegalArgumentException if -BTI or -BTIMR are set. QScripts should not try to scatter gather with those option set.
|
||||
*/
|
||||
def isScatterGatherable(originalFunction: ScatterGatherableFunction) = {
|
||||
if (originalFunction.isInstanceOf[CommandLineGATK]) {
|
||||
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
|
@ -32,18 +46,32 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction {
|
|||
} else false
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the scatter gatherable function.
|
||||
* @param originalFunction Function to bind.
|
||||
*/
|
||||
def setScatterGatherable(originalFunction: ScatterGatherableFunction) = {
|
||||
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
|
||||
this.referenceSequence = gatk.reference_sequence
|
||||
this.intervals ++= gatk.intervalsString
|
||||
this.intervals ++= gatk.intervals.map(_.toString)
|
||||
this.intervalsField = QFunction.findField(originalFunction.getClass, "intervals")
|
||||
this.intervalsStringField = QFunction.findField(originalFunction.getClass, "intervalsString")
|
||||
this.scatterCount = originalFunction.scatterCount
|
||||
this.referenceSequence = gatk.reference_sequence
|
||||
if (gatk.intervals.isEmpty && gatk.intervalsString.isEmpty) {
|
||||
this.intervals ++= IntervalUtils.distinctContigs(this.referenceSequence).toList
|
||||
this.includeUnmapped = this.splitByContig
|
||||
} else {
|
||||
this.intervals ++= gatk.intervals.map(_.toString)
|
||||
this.intervals ++= gatk.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval))
|
||||
this.includeUnmapped = gatk.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
|
||||
}
|
||||
}
|
||||
|
||||
def initCloneInputs(cloneFunction: CloneFunction, index: Int) = {
|
||||
cloneFunction.setFieldValue(this.intervalsField, List(new File("scatter.intervals")))
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, List.empty[String])
|
||||
if (index == scatterCount && includeUnmapped)
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, List("unmapped"))
|
||||
else
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, List.empty[String])
|
||||
}
|
||||
|
||||
def bindCloneInputs(cloneFunction: CloneFunction, index: Int) = {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.queue.library.ipf
|
||||
package org.broadinstitute.sting.queue.library.ipf.intervals
|
||||
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
|
|||
|
|
@ -39,9 +39,8 @@ class PipelineArgumentCollection {
|
|||
@Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false)
|
||||
var skip_cleaning = false
|
||||
|
||||
@Input(doc="List of samples and bams (in the form sample_id k1:v1,k2:v2 "+
|
||||
"cleaned:/path/to/cleaned.bam,recalibrated:/path/to/recal.bam,unreacalibrated:/path/to/unrecal.bam)."+
|
||||
"Mutually exclusive with YAML",required=false, shortName="pBams")
|
||||
@Input(doc="List of samples and bams (in the form sample_id k1:v1,k2:v2 cleaned:/path/to/cleaned.bam,recalibrated:/path/to/recal.bam,unreacalibrated:/path/to/unrecal.bam). Mutually exclusive with YAML",
|
||||
required=false, shortName="pBams")
|
||||
var projectBams: File = _
|
||||
|
||||
@Input(doc="The project name. Mutually exclusive with YAML.", required = false, shortName="pName")
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/**package org.broadinstitute.sting.scala
|
||||
|
||||
package org.broadinstitute.sting.scala
|
||||
/**
|
||||
import gatk.walkers.genotyper.{UnifiedGenotyper, GenotypeCall}
|
||||
import java.io.File
|
||||
import net.sf.samtools.SAMRecord
|
||||
|
|
@ -68,7 +68,7 @@ class TransitionTable() {
|
|||
}
|
||||
}
|
||||
|
||||
class BaseTransitionTableCalculator { // extends LocusWalker[Unit,Int] {
|
||||
**/class BaseTransitionTableCalculator /**{ // extends LocusWalker[Unit,Int] {
|
||||
private var MIN_MAPPING_QUALITY = 30
|
||||
private var MIN_BASE_QUALITY = 20
|
||||
private var MIN_LOD = 5
|
||||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.walkers.{TreeReducible, RefWalker}
|
|||
import org.broadinstitute.sting.commandline.{Output, Argument}
|
||||
import org.broadinstitute.sting.utils.{BaseUtils, GenomeLoc}
|
||||
import collection.mutable.{ListBuffer, HashSet}
|
||||
import java.lang.Math
|
||||
|
||||
class IntervalAnnotationWalker extends RefWalker[AnnotationMetaData,List[IntervalInfoBuilder]] {
|
||||
@Argument(doc="Min proportion of bases overlapping between an interval of interest and an annotation interval for annotation to occur",shortName="mpb")
|
||||
|
|
@ -109,9 +110,9 @@ class IntervalInfoBuilder(loc : GenomeLoc, minProp : Double) {
|
|||
Math.log(1+byteList.tail.size-byteList.tail.dropWhile( u => u.equals(byteList(1))).size) +
|
||||
calcEntropy(byteList.tail.foldLeft(ListBuffer(byteList(0)))( (a,b) => {
|
||||
if ( b.equals(byteList(1)) ) {
|
||||
a.dropRight(1) + (a.last ++ b)
|
||||
a.dropRight(1) :+ (a.last ++ b)
|
||||
} else {
|
||||
a + b
|
||||
a :+ b
|
||||
}
|
||||
}))
|
||||
}
|
||||
Loading…
Reference in New Issue