Removed stochasticity from IndelRealigner by random sampling using and seed based on the read list.

Updated the Queue scatter/gather for read walkers to include -L unmapped on the last scatter job when intervals aren't specified, and to map it correctly when it is explicitly set.
Simplified the build.xml/ivy.xml to fix a bug reported with "ant clean dist test" where the scalac target wasn't found.
Now building all scala code at the same time, just like all java code is compiled at the same time.
Sped up the build for everyone by uncommenting a small bit of classes so that javac/scalac will not constantly launch trying to build .class files that will never compile.
Moved some source files to their expected location so that the .java/.scala -> .class is a one-to-one match, again keeping the compilers from wasting cycles.
Used <uptodate> and <touch> to skip extracting the help text and generating the GATK Queue extensions when the source files haven't been modified.
Fixed a couple errors when the <javadoc> task is run.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4963 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2011-01-07 22:03:36 +00:00
parent be67161b47
commit b34e2f733f
13 changed files with 169 additions and 140 deletions

196
build.xml
View File

@ -10,8 +10,8 @@
<property name="resource.file" value="StingText.properties" />
<property name="resource.path" value="${java.classes}/StingText.properties" />
<property name="queue.source.dir" value="scala/src" />
<property name="queue.classes" value="${build.dir}/scala/classes" />
<property name="scala.source.dir" value="scala/src" />
<property name="scala.classes" value="${build.dir}/scala/classes" />
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
@ -73,14 +73,14 @@
</fileset>
</path>
<!-- Path to queue dependencies. -->
<path id="queue.dependencies">
<!-- Path to scala dependencies. -->
<path id="scala.dependencies">
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
<!-- Need the resources as we will be running a command line program which needs the help text. -->
<pathelement location="${resource.path}" />
<!-- Add any previously compiled queue classes to the path. -->
<pathelement location="${queue.classes}" />
<!-- Add any previously compiled scala classes to the path. -->
<pathelement location="${scala.classes}" />
</path>
<target name="init.resolve" unless="init.resolve.done">
@ -105,12 +105,7 @@
<target name="resolve" depends="init.resolve,init"
description="locate and download library dependencies">
<property name="gatk.ivy.conf" value="default" />
<property name="scala.ivy.conf" value="default" />
<condition property="queue.ivy.conf" value="queue" else="default">
<isset property="queue.include" />
</condition>
<property name="ivy.conf" value="default, ${gatk.ivy.conf}, ${scala.ivy.conf}, ${queue.ivy.conf}"/>
<property name="ivy.conf" value="default"/>
<ivy:retrieve file="ivy.xml" conf="${ivy.conf}" />
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
@ -118,9 +113,9 @@
</target>
<target name="init.buildall">
<!-- Set the properties needed to build Queue -->
<!-- Set the properties needed to build everything -->
<property name="gatk.target" value="oneoffs"/>
<property name="queue.target" value="core"/>
<property name="scala.target" value="core"/>
</target>
<!-- define some key locations that might change based on how the build is run -->
@ -144,31 +139,24 @@
</condition>
<!-- Get the queue build target. Default to none. -->
<condition property="queue.target" value="none" else="${env.QUEUE_BUILD_TYPE}">
<condition property="scala.target" value="none" else="${env.QUEUE_BUILD_TYPE}">
<equals arg1="${env.QUEUE_BUILD_TYPE}" arg2="$${env.QUEUE_BUILD_TYPE}" />
</condition>
<!-- If the queue target is set, or if the queue-extensions needs to be built, then include all queue tasks. -->
<condition property="queue.include">
<!-- If the scala target is set, then include all scala tasks. -->
<condition property="scala.include">
<or>
<not><equals arg1="${queue.target}" arg2="none" /></not>
<not><equals arg1="${scala.target}" arg2="none" /></not>
</or>
</condition>
<!-- If queue is being built, then include scala tasks (init.scalatasks) -->
<condition property="scala.include">
<or>
<isset property="queue.include" />
</or>
</condition>
<!-- Get the pipeline run type. Default to dry. -->
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
</condition>
<echo message="GATK build : ${gatk.target}"/>
<echo message="Queue build : ${queue.target}"/>
<echo message="Scala build : ${scala.target}"/>
<echo message="source revision : ${build.version}"/>
<echo message="build time : ${build.timestamp}" />
@ -202,7 +190,7 @@
</pathconvert>
</target>
<target name="init.scalatasks" depends="resolve" if="scala.include"
<target name="init.scala.compile" depends="resolve"
description="Initializes the scala ant tasks from scala-compiler.jar">
<path id="scala.classpath">
<fileset dir="lib">
@ -232,33 +220,61 @@
</subant>
</target>
<target name="init.queue-extensions.generate" depends="gatk.compile">
<condition property="uptodate.queue-extensions.generate">
<or>
<not>
<isset property="scala.include"/>
</not>
<uptodate targetfile="${queue-extensions.source.dir}">
<srcfiles refid="java.source.files"/>
<srcfiles refid="external.source.files"/>
</uptodate>
</or>
</condition>
</target>
<!-- NOTE: Extracting help first to avoid "Unable to load help text. Help output will be sparse." warning message. -->
<target name="queue-extensions.generate" depends="gatk.compile, extracthelp" if="queue.include" description="generate GATK modules for Queue">
<target name="queue-extensions.generate" depends="extracthelp,init.queue-extensions.generate" unless="uptodate.queue-extensions.generate" description="generate GATK modules for Queue">
<mkdir dir="${queue-extensions.source.dir}"/>
<echo>Generating Queue GATK extensions...</echo>
<java fork="true" failonerror="true" classname="org.broadinstitute.sting.queue.extensions.gatk.GATKExtensionsGenerator" classpathref="queue.dependencies">
<java fork="true" failonerror="true" classname="org.broadinstitute.sting.queue.extensions.gatk.GATKExtensionsGenerator" classpathref="scala.dependencies">
<arg value="-outDir" />
<arg path="${queue-extensions.source.dir}" />
<arg value="-l" />
<arg value="WARN" />
</java>
<touch>
<file file="${queue-extensions.source.dir}"/>
</touch>
</target>
<!-- Queue depends on the gatk since it contains the StingUtils (including CommandLine) -->
<target name="queue.compile" depends="init,resolve,gatk.compile,init.scalatasks,queue-extensions.generate" if="queue.include" description="compile Queue">
<mkdir dir="${queue.classes}"/>
<echo>Building Queue...</echo>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${queue.classes}" classpathref="queue.dependencies" deprecation="yes" unchecked="yes">
<src path="${queue.source.dir}" />
<!-- Scala depends on the java compile -->
<target name="scala.compile" depends="init,resolve,gatk.compile,queue-extensions.generate,init.scala.compile" if="scala.include" description="compile Scala">
<mkdir dir="${scala.classes}"/>
<echo>Building Scala...</echo>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<src path="${scala.source.dir}" />
<src path="${queue-extensions.source.dir}" />
<include name="**/*.scala"/>
<exclude name="*.scala"/>
</scalac>
</target>
<target name="extracthelp" depends="init,gatk.compile"
<target name="init.extracthelp" depends="init,gatk.compile">
<condition property="uptodate.extracthelp">
<or>
<isset property="disable.help"/>
<uptodate targetfile="${basedir}/${resource.path}">
<srcfiles refid="java.source.files"/>
<srcfiles refid="external.source.files"/>
</uptodate>
</or>
</condition>
</target>
<target name="extracthelp" depends="init.extracthelp"
description="Extract help key/value pair file from the JavaDoc tags."
unless="disable.help">
unless="uptodate.extracthelp">
<path id="doclet.classpath">
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
@ -273,7 +289,7 @@
</javadoc>
</target>
<target name="sting.compile" depends="gatk.compile, queue.compile" />
<target name="sting.compile" depends="gatk.compile, scala.compile" />
<target name="init.jar" depends="sting.compile,extracthelp">
<mkdir dir="${dist.dir}"/>
@ -333,9 +349,17 @@
</subant>
</target>
<target name="queue.jar" depends="queue.compile, init.jar" if="queue.include">
<target name="scala.jar" depends="scala.compile, init.jar" if="scala.include">
<jar jarfile="${dist.dir}/GATKScala.jar">
<fileset dir="${scala.classes}">
<include name="org/broadinstitute/sting/scala/**/*.class"/>
</fileset>
</jar>
</target>
<target name="queue.jar" depends="scala.compile, init.jar" if="scala.include">
<jar jarfile="${dist.dir}/Queue.jar">
<fileset dir="${queue.classes}">
<fileset dir="${scala.classes}">
<include name="org/broadinstitute/sting/queue/**/*.class"/>
</fileset>
<fileset dir="${java.classes}">
@ -381,7 +405,7 @@
</target>
<target name="queue.manifests" depends="queue.jar, init.manifests" if="queue.include">
<target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">
<jar jarfile="${dist.dir}/Queue.jar" update="true" >
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
@ -416,16 +440,20 @@
inheritAll is false so that 'ant queue' does not accidentally import
params if the build was called with 'ant clean oneoffs queue'.
Instead this task resets the parameters and is just like running
a fresh 'ant dist -Dqueue.target=core'.
a fresh 'ant dist -Dscala.target=core'.
-->
<antcall target="dist" inheritAll="false">
<param name="queue.target" value="core" />
<param name="scala.target" value="core" />
</antcall>
</target>
<target name="test.compile" depends="init.buildall,dist">
<echo message="Sting: Compiling test cases!"/>
<target name="test.init.compile">
<mkdir dir="${java.test.classes}"/>
<mkdir dir="${scala.test.classes}"/>
</target>
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
<echo message="Sting: Compiling test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on">
<src path="${java.test.sources}"/>
<classpath>
@ -434,55 +462,30 @@
<pathelement location="lib/testng-5.14.1.jar"/>
</classpath>
</javac>
</target>
<echo message="Queue: Compiling test cases!"/>
<mkdir dir="${queue.test.classes}"/>
<scalac fork="true" jvmargs="-Xmx512m" srcdir="${queue.test.sources}" destdir="${queue.test.classes}" deprecation="yes" unchecked="yes">
<include name="org/broadinstitute/sting/queue/**/*.scala"/>
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
<echo message="Scala: Compiling test cases!"/>
<scalac fork="true" jvmargs="-Xmx512m" srcdir="${scala.test.sources}" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
<include name="**/*.scala"/>
<classpath>
<path refid="queue.dependencies"/>
<pathelement location="${queue.classes}"/>
<path refid="scala.dependencies"/>
<pathelement location="${scala.classes}"/>
<pathelement location="${java.test.classes}"/>
<pathelement location="lib/testng-5.14.1.jar"/>
</classpath>
</scalac>
</target>
<target name="test.compile" depends="test.java.compile,test.scala.compile">
</target>
<!-- new scala target -->
<target name="scala" description="build the scala directory">
<antcall target="resolve">
<param name="ivy.conf" value="scala"/>
</antcall>
<antcall target="dist"/>
<property name="scala.source.dir" value="scala/src" />
<property name="scala.classes" value="build/scala" />
<path id="scala.classpath">
<fileset dir="lib">
<include name="scala-compiler-*.jar"/>
<include name="scala-library-*.jar"/>
</fileset>
<fileset dir="${dist.dir}">
<patternset id="scalaStuff">
<include name="*.jar"/>
</patternset>
</fileset>
</path>
<taskdef resource="scala/tools/ant/antlib.xml">
<classpath refid="scala.classpath"/>
</taskdef>
<mkdir dir="${scala.classes}"/>
<echo>Building Scala...</echo>
<scalac fork="true" srcdir="${scala.source.dir}" destdir="${scala.classes}" classpathref="scala.classpath" force="changed">
<include name="*.scala"/>
</scalac>
<jar jarfile="${dist.dir}/GATKScala.jar">
<fileset dir="${scala.classes}">
<include name="*.class"/>
</fileset>
</jar>
<antcall target="scala.jar" inheritAll="false">
<param name="scala.target" value="core" />
</antcall>
</target>
<!-- ***************************************************************************** -->
@ -493,8 +496,8 @@
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
<property name="test.output" value="${dist.dir}/test"/>
<property name="java.test.sources" value="java/test"/>
<property name="queue.test.classes" value="${build.dir}/scala/testclasses"/>
<property name="queue.test.sources" value="scala/test"/>
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
<property name="scala.test.sources" value="scala/test"/>
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
<property name="test.maxmemory" value="4g"/>
@ -521,13 +524,13 @@
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
<pathelement location="${queue.classes}" />
<pathelement location="${scala.classes}" />
<pathelement location="${java.test.classes}" />
<pathelement location="${queue.test.classes}" />
<pathelement location="${scala.test.classes}" />
</classpath>
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${queue.test.classes}" includes="**/@{testtype}*.class" />
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
</testng>
<!-- generate a report for Bamboo or Hudson to read in -->
@ -599,20 +602,21 @@
<target name="clean.javadoc">
<delete dir="javadoc"/>
<delete dir="scaladoc"/>
</target>
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate" description="generates javadoc">
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate,init.scala.compile" description="generates javadoc">
<mkdir dir="javadoc"/>
<javadoc destdir="javadoc"
classpathref="external.dependencies">
<packageset refid="java.source.files"/>
<fileset refid="java.source.files"/>
<fileset refid="external.source.files"/>
</javadoc>
<mkdir dir="javadoc/queue"/>
<scaladoc srcdir="" destdir="javadoc/queue" classpathref="queue.dependencies" deprecation="yes" unchecked="yes">
<src path="${queue.source.dir}"/>
<mkdir dir="scaladoc"/>
<scaladoc srcdir="" destdir="scaladoc" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<src path="${scala.source.dir}"/>
<src path="${queue-extensions.source.dir}"/>
<include name="org/broadinstitute/sting/queue/**/*.scala"/>
<include name="**/*.scala"/>
</scaladoc>
</target>

10
ivy.xml
View File

@ -33,20 +33,18 @@
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
<!-- Commons Dependencies -->
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
<dependency org="commons-io" name="commons-io" rev="2.0"/>
<!-- Dependencies for LSF library -->
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for Queue GATK Extensions code generator living in java/src -->
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
<!-- Scala dependancies -->
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1" conf="scala->default"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1" conf="scala->default"/>
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>
<!-- findbug dependencies -->
<dependency org="net.sourceforge.findbugs" name="findbugs" rev="1.3.2" conf="findbugs->default"/>

View File

@ -182,10 +182,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private final ArrayList<SAMRecord> readsNotToClean = new ArrayList<SAMRecord>();
private final IdentityHashMap<Object, VariantContext> knownIndelsToTry = new IdentityHashMap<Object, VariantContext>();
// random number generator
private static final long RANDOM_SEED = 1252863495;
private static final Random generator = new Random(RANDOM_SEED);
private static final int MAX_QUAL = 99;
// fraction of mismatches that need to no longer mismatch for a column to be considered cleaned
@ -614,9 +610,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// if there are reads with a single indel in them, add that indel to the list of alternate consenses
long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference);
// use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference
// use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed
if ( !USE_KNOWN_INDELS_ONLY )
generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex);
generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex, totalRawMismatchSum);
// if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
@ -867,7 +863,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void generateAlternateConsensesFromReads(final LinkedList<AlignedRead> altAlignmentsToTest,
final Set<Consensus> altConsensesToPopulate,
final byte[] reference,
final int leftmostIndex) {
final int leftmostIndex,
final long randomSeed) {
// if we are under the limit, use all reads to generate alternate consenses
if ( altAlignmentsToTest.size() <= MAX_READS_FOR_CONSENSUSES ) {
@ -879,6 +876,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// otherwise, choose reads for alternate consenses randomly
else {
int readsSeen = 0;
Random generator = new Random(randomSeed);
while ( readsSeen++ < MAX_READS_FOR_CONSENSUSES && altConsensesToPopulate.size() <= MAX_CONSENSUSES) {
int index = generator.nextInt(altAlignmentsToTest.size());
AlignedRead aRead = altAlignmentsToTest.remove(index);

View File

@ -52,7 +52,7 @@ public class IntervalUtils {
return null;
}
// if any argument is 'unmapped', "parse" it to a null entry. A null in this case means 'all the intervals with no alignment data'.
else if(fileOrInterval.trim().toLowerCase().equals("unmapped"))
else if (isUnmapped(fileOrInterval))
rawIntervals.add(GenomeLoc.UNMAPPED);
// if it's a file, add items to raw interval list
else if (isIntervalFile(fileOrInterval)) {
@ -75,6 +75,15 @@ public class IntervalUtils {
return rawIntervals;
}
/**
* Returns true if the interval string is the "unmapped" interval
* @param interval Interval to check
* @return true if the interval string is the "unmapped" interval
*/
public static boolean isUnmapped(String interval) {
return (interval != null && interval.trim().toLowerCase().equals("unmapped"));
}
/**
* merge two interval lists, using an interval set rule
* @param setOne a list of genomeLocs, in order (cannot be NULL)

View File

@ -1,6 +1,6 @@
import org.broadinstitute.sting.commandline.ArgumentCollection
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.library.ipf.ExpandIntervals
import org.broadinstitute.sting.queue.library.ipf.intervals.ExpandIntervals
import org.broadinstitute.sting.queue.pipeline.PipelineArgumentCollection
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.utils.text.XReadLines

View File

@ -101,10 +101,8 @@ class fullCallingPipeline extends QScript {
//val seq = qscript.machine
//val expKind = qscript.protocol
// get contigs (needed for indel cleaning parallelism)
val contigs = IntervalUtils.distinctContigs(
qscript.pipeline.getProject.getReferenceFile,
List(qscript.pipeline.getProject.getIntervalList.getAbsolutePath)).toList
// get max num contigs for indel cleaning parallelism, plus 1 for -L unmapped
val numContigs = IntervalUtils.distinctContigs(qscript.pipeline.getProject.getReferenceFile).size + 1
for ( sample <- recalibratedSamples ) {
val sampleId = sample.getId
@ -134,13 +132,12 @@ class fullCallingPipeline extends QScript {
realigner.targetIntervals = targetCreator.out
realigner.intervals = Nil
realigner.intervalsString = Nil
realigner.scatterCount = num_cleaner_scatter_jobs min contigs.size
realigner.scatterCount = num_cleaner_scatter_jobs min numContigs
realigner.rodBind :+= RodBind("dbsnp", dbsnpType, qscript.pipeline.getProject.getDbsnpFile)
realigner.rodBind :+= RodBind("indels", "VCF", swapExt(realigner.reference_sequence.getParentFile, realigner.reference_sequence, "fasta", "1kg_pilot_indels.vcf"))
// if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates
if (realigner.scatterCount > 1) {
realigner.intervalsString = contigs
realigner.out = cleaned_bam
// While gathering run fix mates.
realigner.setupScatterFunction = {

View File

@ -1,5 +0,0 @@
object HelloWorld {
def main(args: Array[String]) {
println("Hello, world!")
}
}

View File

@ -2,16 +2,11 @@ package org.broadinstitute.sting.queue.extensions.gatk
import org.broadinstitute.sting.commandline.ArgumentSource
import org.broadinstitute.sting.utils.interval.IntervalUtils
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceDataSource
import java.io.File
import net.sf.picard.util.IntervalList
import net.sf.samtools.SAMFileHeader
import collection.JavaConversions._
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
import org.broadinstitute.sting.queue.util.IOUtils
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterGatherableFunction, ScatterFunction}
import org.broadinstitute.sting.queue.function.{QFunction, InProcessFunction}
import org.broadinstitute.sting.queue.QException
/**
* An interval scatter function.
@ -19,11 +14,30 @@ import org.broadinstitute.sting.queue.QException
class IntervalScatterFunction extends ScatterFunction with InProcessFunction {
var splitByContig = false
/** The total number of clone jobs that will be created. */
private var scatterCount: Int = _
/** The reference sequence for the GATK function. */
private var referenceSequence: File = _
/** The runtime field to set for specifying an interval file. */
private var intervalsField: ArgumentSource = _
/** The runtime field to set for specifying an interval string. */
private var intervalsStringField: ArgumentSource = _
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
private var intervals: List[String] = Nil
/** Whether the laster scatter job should also include any unmapped reads. */
private var includeUnmapped: Boolean = _
/**
* Checks if the function is scatter gatherable.
* @param originalFunction Function to check.
* @return true if the function is a GATK function with the reference sequence set.
* @throws IllegalArgumentException if -BTI or -BTIMR are set. QScripts should not try to scatter gather with those option set.
*/
def isScatterGatherable(originalFunction: ScatterGatherableFunction) = {
if (originalFunction.isInstanceOf[CommandLineGATK]) {
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
@ -32,18 +46,32 @@ class IntervalScatterFunction extends ScatterFunction with InProcessFunction {
} else false
}
/**
* Sets the scatter gatherable function.
* @param originalFunction Function to bind.
*/
def setScatterGatherable(originalFunction: ScatterGatherableFunction) = {
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
this.referenceSequence = gatk.reference_sequence
this.intervals ++= gatk.intervalsString
this.intervals ++= gatk.intervals.map(_.toString)
this.intervalsField = QFunction.findField(originalFunction.getClass, "intervals")
this.intervalsStringField = QFunction.findField(originalFunction.getClass, "intervalsString")
this.scatterCount = originalFunction.scatterCount
this.referenceSequence = gatk.reference_sequence
if (gatk.intervals.isEmpty && gatk.intervalsString.isEmpty) {
this.intervals ++= IntervalUtils.distinctContigs(this.referenceSequence).toList
this.includeUnmapped = this.splitByContig
} else {
this.intervals ++= gatk.intervals.map(_.toString)
this.intervals ++= gatk.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval))
this.includeUnmapped = gatk.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
}
}
def initCloneInputs(cloneFunction: CloneFunction, index: Int) = {
cloneFunction.setFieldValue(this.intervalsField, List(new File("scatter.intervals")))
cloneFunction.setFieldValue(this.intervalsStringField, List.empty[String])
if (index == scatterCount && includeUnmapped)
cloneFunction.setFieldValue(this.intervalsStringField, List("unmapped"))
else
cloneFunction.setFieldValue(this.intervalsStringField, List.empty[String])
}
def bindCloneInputs(cloneFunction: CloneFunction, index: Int) = {

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.queue.library.ipf
package org.broadinstitute.sting.queue.library.ipf.intervals
import org.broadinstitute.sting.queue.function.InProcessFunction
import org.broadinstitute.sting.commandline._

View File

@ -39,9 +39,8 @@ class PipelineArgumentCollection {
@Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false)
var skip_cleaning = false
@Input(doc="List of samples and bams (in the form sample_id k1:v1,k2:v2 "+
"cleaned:/path/to/cleaned.bam,recalibrated:/path/to/recal.bam,unreacalibrated:/path/to/unrecal.bam)."+
"Mutually exclusive with YAML",required=false, shortName="pBams")
@Input(doc="List of samples and bams (in the form sample_id k1:v1,k2:v2 cleaned:/path/to/cleaned.bam,recalibrated:/path/to/recal.bam,unreacalibrated:/path/to/unrecal.bam). Mutually exclusive with YAML",
required=false, shortName="pBams")
var projectBams: File = _
@Input(doc="The project name. Mutually exclusive with YAML.", required = false, shortName="pName")

View File

@ -1,5 +1,5 @@
/**package org.broadinstitute.sting.scala
package org.broadinstitute.sting.scala
/**
import gatk.walkers.genotyper.{UnifiedGenotyper, GenotypeCall}
import java.io.File
import net.sf.samtools.SAMRecord
@ -68,7 +68,7 @@ class TransitionTable() {
}
}
class BaseTransitionTableCalculator { // extends LocusWalker[Unit,Int] {
**/class BaseTransitionTableCalculator /**{ // extends LocusWalker[Unit,Int] {
private var MIN_MAPPING_QUALITY = 30
private var MIN_BASE_QUALITY = 20
private var MIN_LOD = 5

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.walkers.{TreeReducible, RefWalker}
import org.broadinstitute.sting.commandline.{Output, Argument}
import org.broadinstitute.sting.utils.{BaseUtils, GenomeLoc}
import collection.mutable.{ListBuffer, HashSet}
import java.lang.Math
class IntervalAnnotationWalker extends RefWalker[AnnotationMetaData,List[IntervalInfoBuilder]] {
@Argument(doc="Min proportion of bases overlapping between an interval of interest and an annotation interval for annotation to occur",shortName="mpb")
@ -109,9 +110,9 @@ class IntervalInfoBuilder(loc : GenomeLoc, minProp : Double) {
Math.log(1+byteList.tail.size-byteList.tail.dropWhile( u => u.equals(byteList(1))).size) +
calcEntropy(byteList.tail.foldLeft(ListBuffer(byteList(0)))( (a,b) => {
if ( b.equals(byteList(1)) ) {
a.dropRight(1) + (a.last ++ b)
a.dropRight(1) :+ (a.last ++ b)
} else {
a + b
a :+ b
}
}))
}