Merge branch 'master' into diffengine
This commit is contained in:
commit
8add2a3866
35
build.xml
35
build.xml
|
|
@ -69,8 +69,6 @@
|
|||
|
||||
<property environment="env"/>
|
||||
|
||||
<property name="drmaa.jar" value="${env.SGE_ROOT}/lib/drmaa.jar" />
|
||||
|
||||
<patternset id="java.source.pattern">
|
||||
<include name="${java.public.source.dir}/**/*.java" />
|
||||
<include name="${java.private.source.dir}/**/*.java" if="include.private" />
|
||||
|
|
@ -146,11 +144,7 @@
|
|||
<property name="init.resolve.done" value="true"/>
|
||||
</target>
|
||||
|
||||
<target name="init.gridengine" depends="init" if="include.gridengine">
|
||||
<copy todir="lib" file="${drmaa.jar}"/>
|
||||
</target>
|
||||
|
||||
<target name="resolve" depends="init.resolve,init,init.gridengine"
|
||||
<target name="resolve" depends="init.resolve,init"
|
||||
description="locate and download library dependencies">
|
||||
<property name="ivy.conf" value="default"/>
|
||||
<ivy:retrieve file="ivy.xml" conf="${ivy.conf}" />
|
||||
|
|
@ -178,13 +172,23 @@
|
|||
<property name="build.version" value="${git.describe.output}" />
|
||||
</target>
|
||||
|
||||
<target name="untagged.build.version" depends="git.describe" unless="git.describe.succeeded">
|
||||
<exec executable="git" outputproperty="build.version" failonerror="true">
|
||||
<target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
|
||||
<exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
|
||||
<arg line="rev-parse HEAD" />
|
||||
</exec>
|
||||
<condition property="git.rev-parse.succeeded">
|
||||
<equals arg1="${git.rev-parse.exit.value}" arg2="0" />
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version" />
|
||||
<target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
|
||||
<property name="build.version" value="${git.rev-parse.output}" />
|
||||
</target>
|
||||
|
||||
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
|
||||
<!-- Set build.version to exported if no other value has been set -->
|
||||
<property name="build.version" value="exported" />
|
||||
</target>
|
||||
|
||||
<!-- define some key locations that might change based on how the build is run -->
|
||||
<target name="init" depends="generate.build.version">
|
||||
|
|
@ -214,12 +218,6 @@
|
|||
</or>
|
||||
</condition>
|
||||
|
||||
<!-- Include Grid Engine in the compile if SGE_ROOT is available. -->
|
||||
<!-- Based off of http://wikis.sun.com/display/GridEngine/Automating+Grid+Engine+Functions+Through+DRMAA -->
|
||||
<condition property="include.gridengine">
|
||||
<available file="${drmaa.jar}"/>
|
||||
</condition>
|
||||
|
||||
<echo message="GATK build : ${gatk.target}"/>
|
||||
<echo message="Scala build : ${scala.target}"/>
|
||||
<echo message="source revision : ${build.version}"/>
|
||||
|
|
@ -357,7 +355,6 @@
|
|||
<src path="${scala.public.source.dir}" />
|
||||
<src path="${queue-extensions.source.dir}" />
|
||||
<include name="**/*.scala"/>
|
||||
<exclude name="**/gridengine/**" unless="include.gridengine" />
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
|
|
@ -374,7 +371,6 @@
|
|||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.private.source.dir}" />
|
||||
<include name="**/*.scala"/>
|
||||
<exclude name="**/gridengine/**" unless="include.gridengine" />
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
|
|
@ -464,7 +460,7 @@
|
|||
<exclude name="**/utils/variantcontext/**/*.class"/>
|
||||
</fileset>
|
||||
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="**/sting/datasources/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
|
||||
|
|
@ -682,7 +678,6 @@
|
|||
<src path="${scala.public.test.sources}" />
|
||||
<src path="${scala.private.test.sources}" />
|
||||
<include name="**/*.scala"/>
|
||||
<exclude name="**/gridengine/**" unless="include.gridengine" />
|
||||
<classpath>
|
||||
<path refid="scala.dependencies"/>
|
||||
<pathelement location="${scala.test.classes}"/>
|
||||
|
|
|
|||
3
ivy.xml
3
ivy.xml
|
|
@ -48,6 +48,9 @@
|
|||
<!-- Dependencies for amazon.com S3 support -->
|
||||
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
|
||||
|
||||
<!-- Dependencies for GridEngine -->
|
||||
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
|
||||
|
||||
<!-- Scala dependancies -->
|
||||
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
|
||||
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
private String PATH_TO_RESOURCES = "R/";
|
||||
private String PATH_TO_RESOURCES = "public/R/";
|
||||
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
|
||||
private int IGNORE_QSCORES_LESS_THAN = 5;
|
||||
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
||||
|
|
|
|||
|
|
@ -92,25 +92,31 @@ public class UnifiedArgumentCollection {
|
|||
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
|
||||
public double INDEL_HETEROZYGOSITY = 1.0/8000;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
|
||||
public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
|
||||
public double INDEL_GAP_OPEN_PENALTY = 45.0;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
|
||||
public int INDEL_HAPLOTYPE_SIZE = 80;
|
||||
@Hidden
|
||||
@Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
|
||||
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
|
||||
//gdebug+
|
||||
@Hidden
|
||||
// experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
|
||||
@Hidden
|
||||
@Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
|
||||
public boolean GET_GAP_PENALTIES_FROM_DATA = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
|
||||
public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
||||
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
|
||||
@Hidden
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import net.sf.samtools.*;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -113,9 +114,10 @@ public class ConstrainedMateFixingManager {
|
|||
HashMap<String, SAMRecordHashObject> forMateMatching = new HashMap<String, SAMRecordHashObject>();
|
||||
TreeSet<SAMRecord> waitingReads = new TreeSet<SAMRecord>(comparer);
|
||||
|
||||
private <T> T remove(TreeSet<T> treeSet) {
|
||||
final T first = treeSet.first();
|
||||
treeSet.remove(first);
|
||||
private SAMRecord remove(TreeSet<SAMRecord> treeSet) {
|
||||
final SAMRecord first = treeSet.first();
|
||||
if ( !treeSet.remove(first) )
|
||||
throw new UserException("Error caching SAM record " + first.getReadName() + ", which is usually caused by malformed SAM/BAM files in which multiple identical copies of a read are present.");
|
||||
return first;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import Jama.Matrix;
|
||||
|
|
@ -72,7 +97,7 @@ public class GaussianMixtureModel {
|
|||
|
||||
int ttt = 0;
|
||||
while( ttt++ < numIterations ) {
|
||||
// Estep: assign each variant to the nearest cluster
|
||||
// E step: assign each variant to the nearest cluster
|
||||
for( final VariantDatum datum : data ) {
|
||||
double minDistance = Double.MAX_VALUE;
|
||||
MultivariateGaussian minGaussian = null;
|
||||
|
|
@ -87,7 +112,7 @@ public class GaussianMixtureModel {
|
|||
datum.assignment = minGaussian;
|
||||
}
|
||||
|
||||
// Mstep: update gaussian means based on assigned variants
|
||||
// M step: update gaussian means based on assigned variants
|
||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||
gaussian.zeroOutMu();
|
||||
int numAssigned = 0;
|
||||
|
|
@ -204,26 +229,29 @@ public class GaussianMixtureModel {
|
|||
}
|
||||
|
||||
public double evaluateDatumMarginalized( final VariantDatum datum ) {
|
||||
int numVals = 0;
|
||||
int numSamples = 0;
|
||||
double sumPVarInGaussian = 0.0;
|
||||
int numIter = 10;
|
||||
final int numIterPerMissingAnnotation = 10; // Trade off here between speed of computation and accuracy of the marginalization
|
||||
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
|
||||
// for each dimension
|
||||
for( int iii = 0; iii < datum.annotations.length; iii++ ) {
|
||||
// marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
|
||||
// if it is missing marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
|
||||
if( datum.isNull[iii] ) {
|
||||
for( int ttt = 0; ttt < numIter; ttt++ ) {
|
||||
datum.annotations[iii] = Normal.staticNextDouble(0.0, 1.0);
|
||||
for( int ttt = 0; ttt < numIterPerMissingAnnotation; ttt++ ) {
|
||||
datum.annotations[iii] = GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); // draw a random sample from the standard normal distribution
|
||||
|
||||
// evaluate this random data point
|
||||
int gaussianIndex = 0;
|
||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
|
||||
}
|
||||
|
||||
sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10));
|
||||
numVals++;
|
||||
// add this sample's probability to the pile in order to take an average in the end
|
||||
sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10)); // p = 10 ^ Sum(pi_k * p(v|n,k))
|
||||
numSamples++;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Math.log10( sumPVarInGaussian / ((double) numVals) );
|
||||
return Math.log10( sumPVarInGaussian / ((double) numSamples) );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import Jama.Matrix;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantQualityScore;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,30 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import cern.jet.random.Normal;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -66,7 +90,7 @@ public class VariantDataManager {
|
|||
meanVector[iii] = theMean;
|
||||
varianceVector[iii] = theSTD;
|
||||
for( final VariantDatum datum : data ) {
|
||||
datum.annotations[iii] = ( datum.isNull[iii] ? Normal.staticNextDouble(0.0, 1.0) : ( datum.annotations[iii] - theMean ) / theSTD );
|
||||
datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD );
|
||||
// Each data point is now [ (x - mean) / standard deviation ]
|
||||
if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) {
|
||||
datum.annotations[iii] /= 3.0;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||
private String RSCRIPT_FILE = null;
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required=false)
|
||||
private String PATH_TO_RESOURCES = "R/";
|
||||
private String PATH_TO_RESOURCES = "public/R/";
|
||||
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots", required=false)
|
||||
private double TS_FILTER_LEVEL = 99.0;
|
||||
|
||||
|
|
@ -118,6 +118,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public void initialize() {
|
||||
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
|
||||
dataManager = new VariantDataManager( new ArrayList<String>(Arrays.asList(USE_ANNOTATIONS)), VRAC );
|
||||
|
||||
if( IGNORE_INPUT_FILTERS != null ) {
|
||||
|
|
@ -228,19 +229,23 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
public void onTraversalDone( final ExpandingArrayList<VariantDatum> reduceSum ) {
|
||||
dataManager.setData( reduceSum );
|
||||
dataManager.normalizeData(); // Each data point is now (x - mean) / standard deviation
|
||||
|
||||
// Generate the positive model using the training data and evaluate each variant
|
||||
final GaussianMixtureModel goodModel = engine.generateModel( dataManager.getTrainingData() );
|
||||
engine.evaluateData( dataManager.getData(), goodModel, false );
|
||||
|
||||
// Generate the negative model using the worst performing data and evaluate each variant contrastively
|
||||
final GaussianMixtureModel badModel = engine.generateModel( dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ) );
|
||||
engine.evaluateData( dataManager.getData(), badModel, true );
|
||||
engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel );
|
||||
|
||||
final ExpandingArrayList<VariantDatum> randomData = dataManager.getRandomDataForPlotting( 6000 );
|
||||
|
||||
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
|
||||
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
|
||||
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
|
||||
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
|
||||
tranchesStream.print(Tranche.tranchesString( tranches ));
|
||||
|
||||
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
|
||||
double lodCutoff = 0.0;
|
||||
for( final Tranche tranche : tranches ) {
|
||||
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
|
||||
|
|
@ -252,7 +257,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
dataManager.writeOutRecalibrationTable( RECAL_FILE );
|
||||
if( RSCRIPT_FILE != null ) {
|
||||
logger.info( "Writing out visualization Rscript file...");
|
||||
createVisualizationScript( randomData, goodModel, badModel, lodCutoff );
|
||||
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff );
|
||||
}
|
||||
|
||||
// Execute Rscript command to create the tranche plot
|
||||
|
|
@ -278,6 +283,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
} catch( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(RSCRIPT_FILE, "", e);
|
||||
}
|
||||
|
||||
// We make extensive use of the ggplot2 library: http://had.co.nz/ggplot2/
|
||||
stream.println("library(ggplot2)");
|
||||
|
||||
createArrangeFunction( stream );
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,28 @@
|
|||
/*
|
||||
* Copyright (c) 2011 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
loc = pos + alleles.get(0).length() - 1;
|
||||
} else if ( !isSingleNucleotideEvent(alleles) ) {
|
||||
ArrayList<Allele> newAlleles = new ArrayList<Allele>();
|
||||
loc = clipAlleles(pos, ref, alleles, newAlleles);
|
||||
loc = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
|
||||
alleles = newAlleles;
|
||||
}
|
||||
|
||||
|
|
@ -504,7 +504,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
* @param clippedAlleles output list of clipped alleles
|
||||
* @return a list of alleles, clipped to the reference
|
||||
*/
|
||||
protected static long clipAlleles(long position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles) {
|
||||
protected static long clipAlleles(long position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
||||
|
||||
// Note that the computation of forward clipping here is meant only to see whether there is a common
|
||||
// base to all alleles, and to correctly compute reverse clipping,
|
||||
|
|
@ -522,6 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
|||
}
|
||||
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
|
||||
clipping = false;
|
||||
else if (ref.length() == reverseClipped)
|
||||
generateException("bad alleles encountered", lineNo);
|
||||
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
|
||||
clipping = false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1343,6 +1343,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
return (int)stop;
|
||||
}
|
||||
|
||||
private boolean hasSymbolicAlleles() {
|
||||
for (Allele a: getAlleles()) {
|
||||
if (a.isSymbolic()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
||||
Allele refAllele = inputVC.getReference();
|
||||
|
||||
|
|
@ -1352,7 +1361,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
|
||||
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
|
||||
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
|
||||
if (refAllele.length() == locLength)
|
||||
if (inputVC.hasSymbolicAlleles())
|
||||
padVC = true;
|
||||
else if (refAllele.length() == locLength)
|
||||
padVC = false;
|
||||
else if (refAllele.length() == locLength-1)
|
||||
padVC = true;
|
||||
|
|
|
|||
|
|
@ -19,10 +19,7 @@ import org.broadinstitute.sting.gatk.phonehome.GATKRunReport
|
|||
class MethodsDevelopmentCallingPipeline extends QScript {
|
||||
qscript =>
|
||||
|
||||
@Argument(shortName="gatk", doc="gatk jar file", required=true)
|
||||
var gatkJarFile: File = _
|
||||
|
||||
@Argument(shortName="outputDir", doc="output directory", required=true)
|
||||
@Argument(shortName="outputDir", doc="output directory", required=false)
|
||||
var outputDir: String = "./"
|
||||
|
||||
@Argument(shortName="skipCalling", doc="skip the calling part of the pipeline and only run VQSR on preset, gold standard VCF files", required=false)
|
||||
|
|
@ -185,7 +182,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
|
||||
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
|
||||
logging_level = "INFO";
|
||||
jarFile = gatkJarFile;
|
||||
memoryLimit = 4;
|
||||
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,5 +25,6 @@
|
|||
<module organisation="javax.activation" resolver="java.net" />
|
||||
<module organisation="net.java.dev.jna" resolver="maven2-repository.dev.java.net" />
|
||||
<module organisation="com.google.code.caliper" resolver="projects" />
|
||||
<module organisation="net.sf.gridscheduler" resolver="projects" />
|
||||
</modules>
|
||||
</ivysettings>
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf.gridscheduler" module="drmaa" revision="6.2u5p2" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue