Merge branch 'master' into diffengine

This commit is contained in:
Mark DePristo 2011-07-08 09:15:54 -04:00
commit 8add2a3866
24 changed files with 304 additions and 52 deletions

View File

@ -69,8 +69,6 @@
<property environment="env"/>
<property name="drmaa.jar" value="${env.SGE_ROOT}/lib/drmaa.jar" />
<patternset id="java.source.pattern">
<include name="${java.public.source.dir}/**/*.java" />
<include name="${java.private.source.dir}/**/*.java" if="include.private" />
@ -146,11 +144,7 @@
<property name="init.resolve.done" value="true"/>
</target>
<target name="init.gridengine" depends="init" if="include.gridengine">
<copy todir="lib" file="${drmaa.jar}"/>
</target>
<target name="resolve" depends="init.resolve,init,init.gridengine"
<target name="resolve" depends="init.resolve,init"
description="locate and download library dependencies">
<property name="ivy.conf" value="default"/>
<ivy:retrieve file="ivy.xml" conf="${ivy.conf}" />
@ -178,13 +172,23 @@
<property name="build.version" value="${git.describe.output}" />
</target>
<target name="untagged.build.version" depends="git.describe" unless="git.describe.succeeded">
<exec executable="git" outputproperty="build.version" failonerror="true">
<target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
<exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
<arg line="rev-parse HEAD" />
</exec>
<condition property="git.rev-parse.succeeded">
<equals arg1="${git.rev-parse.exit.value}" arg2="0" />
</condition>
</target>
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version" />
<target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
<property name="build.version" value="${git.rev-parse.output}" />
</target>
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
<!-- Set build.version to exported if no other value has been set -->
<property name="build.version" value="exported" />
</target>
<!-- define some key locations that might change based on how the build is run -->
<target name="init" depends="generate.build.version">
@ -214,12 +218,6 @@
</or>
</condition>
<!-- Include Grid Engine in the compile if SGE_ROOT is available. -->
<!-- Based off of http://wikis.sun.com/display/GridEngine/Automating+Grid+Engine+Functions+Through+DRMAA -->
<condition property="include.gridengine">
<available file="${drmaa.jar}"/>
</condition>
<echo message="GATK build : ${gatk.target}"/>
<echo message="Scala build : ${scala.target}"/>
<echo message="source revision : ${build.version}"/>
@ -357,7 +355,6 @@
<src path="${scala.public.source.dir}" />
<src path="${queue-extensions.source.dir}" />
<include name="**/*.scala"/>
<exclude name="**/gridengine/**" unless="include.gridengine" />
</scalac>
</target>
@ -374,7 +371,6 @@
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<src path="${scala.private.source.dir}" />
<include name="**/*.scala"/>
<exclude name="**/gridengine/**" unless="include.gridengine" />
</scalac>
</target>
@ -464,7 +460,7 @@
<exclude name="**/utils/variantcontext/**/*.class"/>
</fileset>
<fileset dir="${java.classes}" includes="**/commandline/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/datasources/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/pipeline/**/*.class"/>
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
@ -682,7 +678,6 @@
<src path="${scala.public.test.sources}" />
<src path="${scala.private.test.sources}" />
<include name="**/*.scala"/>
<exclude name="**/gridengine/**" unless="include.gridengine" />
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${scala.test.classes}"/>

View File

@ -48,6 +48,9 @@
<!-- Dependencies for amazon.com S3 support -->
<dependency org="net.java.dev.jets3t" name="jets3t" rev="0.8.0"/>
<!-- Dependencies for GridEngine -->
<dependency org="net.sf.gridscheduler" name="drmaa" rev="latest.integration"/>
<!-- Scala dependancies -->
<dependency org="org.scala-lang" name="scala-compiler" rev="2.8.1"/>
<dependency org="org.scala-lang" name="scala-library" rev="2.8.1"/>

View File

@ -61,7 +61,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "R/";
private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
private int IGNORE_QSCORES_LESS_THAN = 5;
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)

View File

@ -92,25 +92,31 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000;
@Hidden
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
@Hidden
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
public double INDEL_GAP_OPEN_PENALTY = 45.0;
@Hidden
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
public int INDEL_HAPLOTYPE_SIZE = 80;
@Hidden
@Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
//gdebug+
@Hidden
// experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
@Hidden
@Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
public boolean GET_GAP_PENALTIES_FROM_DATA = false;
@Hidden
@Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
@Hidden

View File

@ -5,6 +5,7 @@ import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.*;
@ -113,9 +114,10 @@ public class ConstrainedMateFixingManager {
HashMap<String, SAMRecordHashObject> forMateMatching = new HashMap<String, SAMRecordHashObject>();
TreeSet<SAMRecord> waitingReads = new TreeSet<SAMRecord>(comparer);
private <T> T remove(TreeSet<T> treeSet) {
final T first = treeSet.first();
treeSet.remove(first);
private SAMRecord remove(TreeSet<SAMRecord> treeSet) {
final SAMRecord first = treeSet.first();
if ( !treeSet.remove(first) )
throw new UserException("Error caching SAM record " + first.getReadName() + ", which is usually caused by malformed SAM/BAM files in which multiple identical copies of a read are present.");
return first;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010 The Broad Institute
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import Jama.Matrix;
@ -72,7 +97,7 @@ public class GaussianMixtureModel {
int ttt = 0;
while( ttt++ < numIterations ) {
// Estep: assign each variant to the nearest cluster
// E step: assign each variant to the nearest cluster
for( final VariantDatum datum : data ) {
double minDistance = Double.MAX_VALUE;
MultivariateGaussian minGaussian = null;
@ -87,7 +112,7 @@ public class GaussianMixtureModel {
datum.assignment = minGaussian;
}
// Mstep: update gaussian means based on assigned variants
// M step: update gaussian means based on assigned variants
for( final MultivariateGaussian gaussian : gaussians ) {
gaussian.zeroOutMu();
int numAssigned = 0;
@ -204,26 +229,29 @@ public class GaussianMixtureModel {
}
public double evaluateDatumMarginalized( final VariantDatum datum ) {
int numVals = 0;
int numSamples = 0;
double sumPVarInGaussian = 0.0;
int numIter = 10;
final int numIterPerMissingAnnotation = 10; // Trade off here between speed of computation and accuracy of the marginalization
final double[] pVarInGaussianLog10 = new double[gaussians.size()];
// for each dimension
for( int iii = 0; iii < datum.annotations.length; iii++ ) {
// marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
// if it is missing marginalize over the missing dimension by drawing X random values for the missing annotation and averaging the lod
if( datum.isNull[iii] ) {
for( int ttt = 0; ttt < numIter; ttt++ ) {
datum.annotations[iii] = Normal.staticNextDouble(0.0, 1.0);
for( int ttt = 0; ttt < numIterPerMissingAnnotation; ttt++ ) {
datum.annotations[iii] = GenomeAnalysisEngine.getRandomGenerator().nextGaussian(); // draw a random sample from the standard normal distribution
// evaluate this random data point
int gaussianIndex = 0;
for( final MultivariateGaussian gaussian : gaussians ) {
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
}
sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10));
numVals++;
// add this sample's probability to the pile in order to take an average in the end
sumPVarInGaussian += Math.pow(10.0, MathUtils.log10sumLog10(pVarInGaussianLog10)); // p = 10 ^ Sum(pi_k * p(v|n,k))
numSamples++;
}
}
}
return Math.log10( sumPVarInGaussian / ((double) numVals) );
return Math.log10( sumPVarInGaussian / ((double) numSamples) );
}
}

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import Jama.Matrix;

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010 The Broad Institute
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;

View File

@ -1,8 +1,32 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantQualityScore;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;

View File

@ -1,6 +1,30 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import cern.jet.random.Normal;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -66,7 +90,7 @@ public class VariantDataManager {
meanVector[iii] = theMean;
varianceVector[iii] = theSTD;
for( final VariantDatum datum : data ) {
datum.annotations[iii] = ( datum.isNull[iii] ? Normal.staticNextDouble(0.0, 1.0) : ( datum.annotations[iii] - theMean ) / theSTD );
datum.annotations[iii] = ( datum.isNull[iii] ? GenomeAnalysisEngine.getRandomGenerator().nextGaussian() : ( datum.annotations[iii] - theMean ) / theSTD );
// Each data point is now [ (x - mean) / standard deviation ]
if( annotationKeys.get(iii).toLowerCase().contains("ranksum") && datum.isNull[iii] && datum.annotations[iii] > 0.0 ) {
datum.annotations[iii] /= 3.0;

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
/**

View File

@ -88,7 +88,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
private String RSCRIPT_FILE = null;
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required=false)
private String PATH_TO_RESOURCES = "R/";
private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName="ts_filter_level", shortName="ts_filter_level", doc="The truth sensitivity level at which to start filtering, used here to indicate filtered variants in plots", required=false)
private double TS_FILTER_LEVEL = 99.0;
@ -118,6 +118,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
//---------------------------------------------------------------------------------------------------------------
public void initialize() {
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
dataManager = new VariantDataManager( new ArrayList<String>(Arrays.asList(USE_ANNOTATIONS)), VRAC );
if( IGNORE_INPUT_FILTERS != null ) {
@ -228,19 +229,23 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
public void onTraversalDone( final ExpandingArrayList<VariantDatum> reduceSum ) {
dataManager.setData( reduceSum );
dataManager.normalizeData(); // Each data point is now (x - mean) / standard deviation
// Generate the positive model using the training data and evaluate each variant
final GaussianMixtureModel goodModel = engine.generateModel( dataManager.getTrainingData() );
engine.evaluateData( dataManager.getData(), goodModel, false );
// Generate the negative model using the worst performing data and evaluate each variant contrastively
final GaussianMixtureModel badModel = engine.generateModel( dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ) );
engine.evaluateData( dataManager.getData(), badModel, true );
engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel );
final ExpandingArrayList<VariantDatum> randomData = dataManager.getRandomDataForPlotting( 6000 );
// Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user
final int nCallsAtTruth = TrancheManager.countCallsAtTruth( dataManager.getData(), Double.NEGATIVE_INFINITY );
final TrancheManager.SelectionMetric metric = new TrancheManager.TruthSensitivityMetric( nCallsAtTruth );
final List<Tranche> tranches = TrancheManager.findTranches( dataManager.getData(), TS_TRANCHES, metric );
tranchesStream.print(Tranche.tranchesString( tranches ));
// Find the filtering lodCutoff for display on the model PDFs. Red variants are those which were below the cutoff and filtered out of the final callset.
double lodCutoff = 0.0;
for( final Tranche tranche : tranches ) {
if( MathUtils.compareDoubles(tranche.ts, TS_FILTER_LEVEL, 0.0001)==0 ) {
@ -252,7 +257,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
dataManager.writeOutRecalibrationTable( RECAL_FILE );
if( RSCRIPT_FILE != null ) {
logger.info( "Writing out visualization Rscript file...");
createVisualizationScript( randomData, goodModel, badModel, lodCutoff );
createVisualizationScript( dataManager.getRandomDataForPlotting( 6000 ), goodModel, badModel, lodCutoff );
}
// Execute Rscript command to create the tranche plot
@ -278,6 +283,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
} catch( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(RSCRIPT_FILE, "", e);
}
// We make extensive use of the ggplot2 library: http://had.co.nz/ggplot2/
stream.println("library(ggplot2)");
createArrangeFunction( stream );

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.commandline.Argument;

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;

View File

@ -225,7 +225,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
loc = pos + alleles.get(0).length() - 1;
} else if ( !isSingleNucleotideEvent(alleles) ) {
ArrayList<Allele> newAlleles = new ArrayList<Allele>();
loc = clipAlleles(pos, ref, alleles, newAlleles);
loc = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
alleles = newAlleles;
}
@ -504,7 +504,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
* @param clippedAlleles output list of clipped alleles
* @return a list of alleles, clipped to the reference
*/
protected static long clipAlleles(long position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles) {
protected static long clipAlleles(long position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
// Note that the computation of forward clipping here is meant only to see whether there is a common
// base to all alleles, and to correctly compute reverse clipping,
@ -522,6 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
clipping = false;
else if (ref.length() == reverseClipped)
generateException("bad alleles encountered", lineNo);
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
clipping = false;
}

View File

@ -1343,6 +1343,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return (int)stop;
}
private boolean hasSymbolicAlleles() {
for (Allele a: getAlleles()) {
if (a.isSymbolic()) {
return true;
}
}
return false;
}
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC, byte inputRefBase, boolean refBaseShouldBeAppliedToEndOfAlleles) {
Allele refAllele = inputVC.getReference();
@ -1352,7 +1361,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
// We need to pad a VC with a common base if the length of the reference allele is less than the length of the VariantContext.
// This happens because the position of e.g. an indel is always one before the actual event (as per VCF convention).
long locLength = (inputVC.getEnd() - inputVC.getStart()) + 1;
if (refAllele.length() == locLength)
if (inputVC.hasSymbolicAlleles())
padVC = true;
else if (refAllele.length() == locLength)
padVC = false;
else if (refAllele.length() == locLength-1)
padVC = true;

View File

@ -19,10 +19,7 @@ import org.broadinstitute.sting.gatk.phonehome.GATKRunReport
class MethodsDevelopmentCallingPipeline extends QScript {
qscript =>
@Argument(shortName="gatk", doc="gatk jar file", required=true)
var gatkJarFile: File = _
@Argument(shortName="outputDir", doc="output directory", required=true)
@Argument(shortName="outputDir", doc="output directory", required=false)
var outputDir: String = "./"
@Argument(shortName="skipCalling", doc="skip the calling part of the pipeline and only run VQSR on preset, gold standard VCF files", required=false)
@ -185,7 +182,6 @@ class MethodsDevelopmentCallingPipeline extends QScript {
trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
logging_level = "INFO";
jarFile = gatkJarFile;
memoryLimit = 4;
phone_home = if ( LOCAL_ET ) GATKRunReport.PhoneHomeOption.STANDARD else GATKRunReport.PhoneHomeOption.AWS_S3
}

View File

@ -25,5 +25,6 @@
<module organisation="javax.activation" resolver="java.net" />
<module organisation="net.java.dev.jna" resolver="maven2-repository.dev.java.net" />
<module organisation="com.google.code.caliper" resolver="projects" />
<module organisation="net.sf.gridscheduler" resolver="projects" />
</modules>
</ivysettings>

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf.gridscheduler" module="drmaa" revision="6.2u5p2" status="release" />
</ivy-module>