Updating VQSR for new RodBinding syntax. Cleaning up indel specific parts of VQSR.
This commit is contained in:
parent
9e53fd6880
commit
c60cf52f73
|
|
@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -56,6 +56,11 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
/////////////////////////////
|
||||
// Inputs
|
||||
/////////////////////////////
|
||||
/**
|
||||
* The raw input variants to be recalibrated.
|
||||
*/
|
||||
@Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
|
||||
public List<RodBinding<VariantContext>> input;
|
||||
@Input(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
|
||||
private File RECAL_FILE;
|
||||
@Input(fullName="tranches_file", shortName="tranchesFile", doc="The input tranches file describing where to cut the data", required=true)
|
||||
|
|
@ -101,17 +106,8 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
Collections.reverse(tranches); // this algorithm wants the tranches ordered from best (lowest truth sensitivity) to worst (highest truth sensitivity)
|
||||
|
||||
for( final ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
if( d.getName().startsWith("input") ) {
|
||||
inputNames.add(d.getName());
|
||||
logger.info("Found input variant track with name " + d.getName());
|
||||
} else {
|
||||
logger.info("Not evaluating ROD binding " + d.getName());
|
||||
}
|
||||
}
|
||||
|
||||
if( inputNames.size() == 0 ) {
|
||||
throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." );
|
||||
for( final RodBinding rod : input ) {
|
||||
inputNames.add( rod.getName() );
|
||||
}
|
||||
|
||||
if( IGNORE_INPUT_FILTERS != null ) {
|
||||
|
|
@ -168,7 +164,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> {
|
|||
return 1;
|
||||
}
|
||||
|
||||
for( VariantContext vc : tracker.getValues(VariantContext.class, inputNames, context.getLocation()) ) {
|
||||
for( VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
|
||||
if( vc != null ) {
|
||||
if( VariantRecalibrator.checkRecalibrationMode( vc, MODE ) && (vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
String filterString = null;
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -105,31 +106,6 @@ public class VariantDataManager {
|
|||
}
|
||||
}
|
||||
|
||||
public void addTrainingSet( final TrainingSet trainingSet ) {
|
||||
trainingSets.add( trainingSet );
|
||||
}
|
||||
|
||||
public boolean checkHasTrainingSet() {
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
if( trainingSet.isTraining ) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean checkHasTruthSet() {
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
if( trainingSet.isTruth ) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean checkHasKnownSet() {
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
if( trainingSet.isKnown ) { return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public ExpandingArrayList<VariantDatum> getTrainingData() {
|
||||
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
|
||||
for( final VariantDatum datum : data ) {
|
||||
|
|
@ -240,13 +216,14 @@ public class VariantDataManager {
|
|||
if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM
|
||||
value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||
}
|
||||
if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
|
||||
// normalize QD by event length for indel case
|
||||
int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
|
||||
if (eventLength > 0) // sanity check
|
||||
value /= (double)eventLength;
|
||||
|
||||
}
|
||||
if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) {
|
||||
// normalize QD by event length for indel case
|
||||
int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now
|
||||
if (eventLength > 0) { // sanity check
|
||||
value /= (double)eventLength;
|
||||
}
|
||||
}
|
||||
|
||||
if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
||||
if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); }
|
||||
|
|
@ -257,30 +234,44 @@ public class VariantDataManager {
|
|||
return value;
|
||||
}
|
||||
|
||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
|
||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final HashMap<String, Double> rodToPriorMap,
|
||||
final List<RodBinding<VariantContext>> training, final List<RodBinding<VariantContext>> truth, final List<RodBinding<VariantContext>> known, final List<RodBinding<VariantContext>> badSites) {
|
||||
datum.isKnown = false;
|
||||
datum.atTruthSite = false;
|
||||
datum.atTrainingSite = false;
|
||||
datum.atAntiTrainingSite = false;
|
||||
datum.prior = 2.0;
|
||||
datum.consensusCount = 0;
|
||||
|
||||
for( final TrainingSet trainingSet : trainingSets ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(VariantContext.class, trainingSet.name, ref.getLocus()) ) {
|
||||
if( trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
||||
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic()) ) {
|
||||
|
||||
datum.isKnown = datum.isKnown || trainingSet.isKnown;
|
||||
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
|
||||
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
|
||||
datum.prior = Math.max( datum.prior, trainingSet.prior );
|
||||
datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 );
|
||||
for( final RodBinding<VariantContext> rod : training ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
||||
datum.atTrainingSite = true;
|
||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
||||
}
|
||||
}
|
||||
}
|
||||
for( final RodBinding<VariantContext> rod : truth ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
||||
datum.atTruthSite = true;
|
||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
||||
}
|
||||
}
|
||||
}
|
||||
for( final RodBinding<VariantContext> rod : known ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
||||
datum.isKnown = true;
|
||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
||||
}
|
||||
}
|
||||
}
|
||||
for( final RodBinding<VariantContext> rod : badSites ) {
|
||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
||||
if( trainVC != null ) {
|
||||
datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining;
|
||||
datum.atAntiTrainingSite = true;
|
||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -292,4 +283,10 @@ public class VariantDataManager {
|
|||
(datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
||||
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,13 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
|
|
@ -57,11 +53,51 @@ import java.util.*;
|
|||
|
||||
public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDatum>, ExpandingArrayList<VariantDatum>> implements TreeReducible<ExpandingArrayList<VariantDatum>> {
|
||||
|
||||
public static final String VQS_LOD_KEY = "VQSLOD";
|
||||
public static final String CULPRIT_KEY = "culprit";
|
||||
public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model
|
||||
public static final String CULPRIT_KEY = "culprit"; // The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out
|
||||
|
||||
@ArgumentCollection private VariantRecalibratorArgumentCollection VRAC = new VariantRecalibratorArgumentCollection();
|
||||
|
||||
/////////////////////////////
|
||||
// Inputs
|
||||
/////////////////////////////
|
||||
/**
|
||||
* The raw input variants to be recalibrated.
|
||||
*/
|
||||
@Input(fullName="input", shortName = "input", doc="The raw input variants to be recalibrated", required=true)
|
||||
public List<RodBinding<VariantContext>> input;
|
||||
/**
|
||||
* A list of training variants used to train the Gaussian mixture model.
|
||||
*
|
||||
* Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
|
||||
*/
|
||||
@Input(fullName="training", shortName = "training", doc="A list of training variants used to train the Gaussian mixture model", required=true)
|
||||
public List<RodBinding<VariantContext>> training;
|
||||
/**
|
||||
* A list of true variants to be used when deciding the truth sensitivity cut of the final callset.
|
||||
*
|
||||
* When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
|
||||
* Typically one might want to say I dropped my threshold until I got back 99% of HapMap sites, for example.
|
||||
*/
|
||||
@Input(fullName="truth", shortName = "truth", doc="A list of true variants to be used when deciding the truth sensitivity cut of the final callset", required=true)
|
||||
public List<RodBinding<VariantContext>> truth;
|
||||
/**
|
||||
* A list of known variants to be used for metric comparison purposes.
|
||||
*
|
||||
* The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
|
||||
* The output metrics are stratified by known status in order to aid in comparisons with other call sets.
|
||||
*/
|
||||
@Input(fullName="known", shortName = "known", doc="A list of known variants to be used for metric comparison purposes", required=false)
|
||||
public List<RodBinding<VariantContext>> known = Collections.emptyList();
|
||||
/**
|
||||
* A list of known bad variants used to supplement training the negative model.
|
||||
*
|
||||
* In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list
|
||||
* with a database of known bad variants. Maybe these are loci which are frequently filtered out in many projects (centromere, for example).
|
||||
*/
|
||||
@Input(fullName="badSites", shortName = "badSites", doc="A list of known bad variants used to supplement training the negative model", required=false)
|
||||
public List<RodBinding<VariantContext>> badSites = Collections.emptyList();
|
||||
|
||||
/////////////////////////////
|
||||
// Outputs
|
||||
/////////////////////////////
|
||||
|
|
@ -96,9 +132,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
@Hidden
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
@Hidden
|
||||
@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
|
||||
protected Boolean PERFORM_PROJECT_CONSENSUS = false;
|
||||
//@Hidden
|
||||
//@Argument(fullName = "projectConsensus", shortName = "projectConsensus", doc = "Perform 1000G project consensus. This implies an extra prior factor based on the individual participant callsets passed in with consensus=true rod binding tags.", required = false)
|
||||
//protected Boolean PERFORM_PROJECT_CONSENSUS = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Private Member Variables
|
||||
|
|
@ -106,8 +142,8 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
private VariantDataManager dataManager;
|
||||
private PrintStream tranchesStream;
|
||||
private final Set<String> ignoreInputFilterSet = new TreeSet<String>();
|
||||
private final Set<String> inputNames = new HashSet<String>();
|
||||
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
|
||||
private final HashMap<String, Double> rodToPriorMap = new HashMap<String, Double>();
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -123,31 +159,24 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
ignoreInputFilterSet.addAll( Arrays.asList(IGNORE_INPUT_FILTERS) );
|
||||
}
|
||||
|
||||
for( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
|
||||
if( d.getName().toLowerCase().startsWith("input") ) {
|
||||
inputNames.add(d.getName());
|
||||
logger.info( "Found input variant track with name " + d.getName() );
|
||||
} else {
|
||||
dataManager.addTrainingSet( new TrainingSet(d.getName(), d.getTags()) );
|
||||
}
|
||||
}
|
||||
|
||||
if( !dataManager.checkHasTrainingSet() ) {
|
||||
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
if( !dataManager.checkHasTruthSet() ) {
|
||||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
if( inputNames.size() == 0 ) {
|
||||
throw new UserException.BadInput( "No input variant tracks found. Input variant binding names must begin with 'input'." );
|
||||
}
|
||||
|
||||
try {
|
||||
tranchesStream = new PrintStream(TRANCHES_FILE);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
|
||||
}
|
||||
|
||||
final ArrayList<RodBinding<VariantContext>> allInputBindings = new ArrayList<RodBinding<VariantContext>>();
|
||||
allInputBindings.addAll(truth);
|
||||
allInputBindings.addAll(training);
|
||||
allInputBindings.addAll(known);
|
||||
allInputBindings.addAll(badSites);
|
||||
for( final RodBinding<VariantContext> rod : allInputBindings ) {
|
||||
try {
|
||||
rodToPriorMap.put(rod.getName(), (rod.getTags().containsKey("prior") ? Double.parseDouble(rod.getTags().getValue("prior")) : 0.0) );
|
||||
} catch( NumberFormatException e ) {
|
||||
throw new UserException.BadInput("Bad rod binding syntax. Prior key-value tag detected but isn't parsable. Expecting something like -training:prior=12.0 my.set.vcf");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -163,10 +192,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
return mapList;
|
||||
}
|
||||
|
||||
for( final VariantContext vc : tracker.getValues(VariantContext.class, inputNames, context.getLocation()) ) {
|
||||
for( final VariantContext vc : tracker.getValues(input, context.getLocation()) ) {
|
||||
if( vc != null && ( vc.isNotFiltered() || ignoreInputFilterSet.containsAll(vc.getFilters()) ) ) {
|
||||
if( checkRecalibrationMode( vc, VRAC.MODE ) ) {
|
||||
final VariantDatum datum = new VariantDatum();
|
||||
|
||||
// Populate the datum with lots of fields from the VariantContext, unfortunately the VC is too big so we just pull in only the things we absolutely need.
|
||||
dataManager.decodeAnnotations( datum, vc, true ); //BUGBUG: when run with HierarchicalMicroScheduler this is non-deterministic because order of calls depends on load of machine
|
||||
datum.contig = vc.getChr();
|
||||
datum.start = vc.getStart();
|
||||
|
|
@ -176,12 +207,12 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
|
||||
|
||||
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
||||
dataManager.parseTrainingSets( tracker, ref, context, vc, datum, TRUST_ALL_POLYMORPHIC );
|
||||
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites );
|
||||
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
if( PERFORM_PROJECT_CONSENSUS ) {
|
||||
final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||
priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
||||
}
|
||||
//if( PERFORM_PROJECT_CONSENSUS ) {
|
||||
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
||||
//}
|
||||
datum.prior = Math.log10( priorFactor ) - Math.log10( 1.0 - priorFactor );
|
||||
|
||||
mapList.add( datum );
|
||||
|
|
|
|||
|
|
@ -41,11 +41,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b37KGReference +
|
||||
" -B:dbsnp,VCF,known=true,training=false,truth=false,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
|
||||
" -B:hapmap,VCF,known=false,training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
|
||||
" -B:omni,VCF,known=false,training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
|
||||
" -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
|
||||
" -training:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
|
||||
" -truth:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
|
||||
" -training:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
|
||||
" -truth:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
|
||||
" -T VariantRecalibrator" +
|
||||
" -B:input,VCF " + params.inVCF +
|
||||
" -input " + params.inVCF +
|
||||
" -L 20:1,000,000-40,000,000" +
|
||||
" -an QD -an HaplotypeScore -an HRun" +
|
||||
" -percentBad 0.07" +
|
||||
|
|
@ -64,7 +66,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -T ApplyRecalibration" +
|
||||
" -L 20:12,000,000-30,000,000" +
|
||||
" -NO_HEADER" +
|
||||
" -B:input,VCF " + params.inVCF +
|
||||
" -input " + params.inVCF +
|
||||
" -o %s" +
|
||||
" -tranchesFile " + MD5DB.getMD5FilePath(params.tranchesMD5, null) +
|
||||
" -recalFile " + MD5DB.getMD5FilePath(params.recalMD5, null),
|
||||
|
|
|
|||
Loading…
Reference in New Issue