Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
5b1bf6e244
|
|
@ -379,7 +379,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tribbleType == null )
|
if ( tribbleType == null )
|
||||||
if ( ! file.canRead() | !! file.isFile() ) {
|
if ( ! file.canRead() | ! file.isFile() ) {
|
||||||
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
|
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
|
||||||
} else {
|
} else {
|
||||||
throw new UserException.CommandLineException(
|
throw new UserException.CommandLineException(
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.picard.util.PeekableIterator;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
import net.sf.samtools.GATKBAMFileSpan;
|
||||||
|
import net.sf.samtools.GATKChunk;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
|
|
@ -84,7 +85,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
||||||
if(currentLocus == GenomeLoc.UNMAPPED) {
|
if(currentLocus == GenomeLoc.UNMAPPED) {
|
||||||
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
|
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
|
||||||
for(SAMReaderID id: dataSource.getReaderIDs())
|
for(SAMReaderID id: dataSource.getReaderIDs())
|
||||||
nextFilePointer.addFileSpans(id,new GATKBAMFileSpan());
|
nextFilePointer.addFileSpans(id,new GATKBAMFileSpan(new GATKChunk(indexFiles.get(id).getStartOfLastLinearBin(),Long.MAX_VALUE)));
|
||||||
currentLocus = null;
|
currentLocus = null;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -215,6 +215,45 @@ public class GATKBAMIndex {
|
||||||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||||
|
* @return The file offset of the first record in the last linear bin, or -1
|
||||||
|
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||||
|
*/
|
||||||
|
public long getStartOfLastLinearBin() {
|
||||||
|
openIndexFile();
|
||||||
|
|
||||||
|
seek(4);
|
||||||
|
|
||||||
|
final int sequenceCount = readInteger();
|
||||||
|
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||||
|
// grab the last element of the linear index for each sequence, and return
|
||||||
|
// the last one from the last sequence that has one.
|
||||||
|
long lastLinearIndexPointer = -1;
|
||||||
|
for (int i = 0; i < sequenceCount; i++) {
|
||||||
|
// System.out.println("# Sequence TID: " + i);
|
||||||
|
final int nBins = readInteger();
|
||||||
|
// System.out.println("# nBins: " + nBins);
|
||||||
|
for (int j1 = 0; j1 < nBins; j1++) {
|
||||||
|
// Skip bin #
|
||||||
|
skipBytes(4);
|
||||||
|
final int nChunks = readInteger();
|
||||||
|
// Skip chunks
|
||||||
|
skipBytes(16 * nChunks);
|
||||||
|
}
|
||||||
|
final int nLinearBins = readInteger();
|
||||||
|
if (nLinearBins > 0) {
|
||||||
|
// Skip to last element of list of linear bins
|
||||||
|
skipBytes(8 * (nLinearBins - 1));
|
||||||
|
lastLinearIndexPointer = readLongs(1)[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
closeIndexFile();
|
||||||
|
|
||||||
|
return lastLinearIndexPointer;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the possible number of bins for a given reference sequence.
|
* Gets the possible number of bins for a given reference sequence.
|
||||||
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
||||||
|
|
|
||||||
|
|
@ -134,25 +134,12 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
||||||
shardPosition = currentFilePointer.fileSpans;
|
shardPosition = currentFilePointer.fileSpans;
|
||||||
|
|
||||||
for(SAMReaderID id: shardPosition.keySet()) {
|
for(SAMReaderID id: shardPosition.keySet()) {
|
||||||
// If the region contains location information (in other words, it is not at
|
|
||||||
// the start of the unmapped region), add the region.
|
|
||||||
if(currentFilePointer.isRegionUnmapped) {
|
|
||||||
// If the region is unmapped and no location data exists, add a null as an indicator to
|
|
||||||
// start at the next unmapped region.
|
|
||||||
if(!isIntoUnmappedRegion) {
|
|
||||||
selectedReaders.put(id,null);
|
|
||||||
isIntoUnmappedRegion = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
selectedReaders.put(id,position.get(id));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
|
SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
|
||||||
if(!fileSpan.isEmpty())
|
if(!fileSpan.isEmpty())
|
||||||
selectedReaders.put(id,fileSpan);
|
selectedReaders.put(id,fileSpan);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if(selectedReaders.size() > 0) {
|
if(selectedReaders.size() > 0) {
|
||||||
Shard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
Shard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||||
|
|
|
||||||
|
|
@ -86,6 +86,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The INFO field will be annotated with information on the most biologically-significant effect
|
* The INFO field will be annotated with information on the most biologically-significant effect
|
||||||
|
|
@ -208,6 +209,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
|
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
|
||||||
engine.initializeExpressions(expressionsToUse);
|
engine.initializeExpressions(expressionsToUse);
|
||||||
|
|
||||||
|
engine.invokeAnnotationInitializationMethods();
|
||||||
|
|
||||||
// setup the header fields
|
// setup the header fields
|
||||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
|
|
||||||
|
|
@ -29,10 +29,7 @@ import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
@ -113,6 +110,16 @@ public class VariantAnnotatorEngine {
|
||||||
dbAnnotations.put(rod, rod.getName());
|
dbAnnotations.put(rod, rod.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void invokeAnnotationInitializationMethods() {
|
||||||
|
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||||
|
annotation.initialize(walker);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||||
|
annotation.initialize(walker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
||||||
|
|
||||||
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import java.util.List;
|
||||||
public interface AnnotatorCompatibleWalker {
|
public interface AnnotatorCompatibleWalker {
|
||||||
|
|
||||||
// getter methods for various used bindings
|
// getter methods for various used bindings
|
||||||
|
public abstract RodBinding<VariantContext> getVariantRodBinding();
|
||||||
public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
|
public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
|
||||||
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
|
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
|
||||||
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
|
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
|
||||||
|
|
|
||||||
|
|
@ -24,18 +24,15 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
||||||
public abstract class VariantAnnotatorAnnotation {
|
public abstract class VariantAnnotatorAnnotation {
|
||||||
// return the INFO keys
|
// return the INFO keys
|
||||||
public abstract List<String> getKeyNames();
|
public abstract List<String> getKeyNames();
|
||||||
|
|
||||||
|
// initialization method (optional for subclasses, and therefore non-abstract)
|
||||||
|
public void initialize ( AnnotatorCompatibleWalker walker ) { }
|
||||||
}
|
}
|
||||||
|
|
@ -127,6 +127,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
||||||
|
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
|
||||||
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
|
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
|
||||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
||||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* <h2>Output</h2>
|
* <h2>Output</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* Evaluation tables.
|
* Evaluation tables detailing the results of the eval modules which were applied.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h2>Examples</h2>
|
* <h2>Examples</h2>
|
||||||
|
|
@ -152,6 +152,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||||
private File ancestralAlignmentsFile = null;
|
private File ancestralAlignmentsFile = null;
|
||||||
|
|
||||||
|
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
|
||||||
|
private boolean requireStrictAlleleMatch = false;
|
||||||
|
|
||||||
// Variables
|
// Variables
|
||||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||||
|
|
||||||
|
|
@ -360,16 +363,16 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
if ( matchingComps.size() == 0 )
|
if ( matchingComps.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// find the comp which matches the alternate allele from eval
|
// find the comp which matches both the reference allele and alternate allele from eval
|
||||||
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
||||||
for ( VariantContext comp : matchingComps ) {
|
for ( VariantContext comp : matchingComps ) {
|
||||||
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
||||||
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp)) )
|
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
|
||||||
return comp;
|
return comp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if none match, just return the first one
|
// if none match, just return the first one unless we require a strict match
|
||||||
return matchingComps.get(0);
|
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.commandline.Tags;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 3/12/11
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TrainingSet {
|
||||||
|
|
||||||
|
public RodBinding<VariantContext> rodBinding;
|
||||||
|
public boolean isKnown = false;
|
||||||
|
public boolean isTraining = false;
|
||||||
|
public boolean isAntiTraining = false;
|
||||||
|
public boolean isTruth = false;
|
||||||
|
public boolean isConsensus = false;
|
||||||
|
public double prior = 0.0;
|
||||||
|
|
||||||
|
protected final static Logger logger = Logger.getLogger(TrainingSet.class);
|
||||||
|
|
||||||
|
public TrainingSet( final RodBinding<VariantContext> rodBinding) {
|
||||||
|
this.rodBinding = rodBinding;
|
||||||
|
|
||||||
|
final Tags tags = rodBinding.getTags();
|
||||||
|
final String name = rodBinding.getName();
|
||||||
|
|
||||||
|
// Parse the tags to decide which tracks have which properties
|
||||||
|
if( tags != null ) {
|
||||||
|
isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
|
||||||
|
isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
|
||||||
|
isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
|
||||||
|
isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
|
||||||
|
isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
|
||||||
|
prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report back to the user which tracks were found and the properties that were detected
|
||||||
|
if( !isConsensus && !isAntiTraining ) {
|
||||||
|
logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", name, isKnown, isTraining, isTruth, prior) );
|
||||||
|
} else if( isConsensus ) {
|
||||||
|
logger.info( String.format( "Found consensus track: %s", name) );
|
||||||
|
} else {
|
||||||
|
logger.info( String.format( "Found bad sites training track: %s", name) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -51,10 +51,10 @@ public class VariantDataManager {
|
||||||
private ExpandingArrayList<VariantDatum> data;
|
private ExpandingArrayList<VariantDatum> data;
|
||||||
private final double[] meanVector;
|
private final double[] meanVector;
|
||||||
private final double[] varianceVector; // this is really the standard deviation
|
private final double[] varianceVector; // this is really the standard deviation
|
||||||
public final ArrayList<String> annotationKeys;
|
public final List<String> annotationKeys;
|
||||||
private final VariantRecalibratorArgumentCollection VRAC;
|
private final VariantRecalibratorArgumentCollection VRAC;
|
||||||
protected final static Logger logger = Logger.getLogger(VariantDataManager.class);
|
protected final static Logger logger = Logger.getLogger(VariantDataManager.class);
|
||||||
|
protected final List<TrainingSet> trainingSets;
|
||||||
|
|
||||||
public VariantDataManager( final List<String> annotationKeys, final VariantRecalibratorArgumentCollection VRAC ) {
|
public VariantDataManager( final List<String> annotationKeys, final VariantRecalibratorArgumentCollection VRAC ) {
|
||||||
this.data = null;
|
this.data = null;
|
||||||
|
|
@ -62,6 +62,7 @@ public class VariantDataManager {
|
||||||
this.VRAC = VRAC;
|
this.VRAC = VRAC;
|
||||||
meanVector = new double[this.annotationKeys.size()];
|
meanVector = new double[this.annotationKeys.size()];
|
||||||
varianceVector = new double[this.annotationKeys.size()];
|
varianceVector = new double[this.annotationKeys.size()];
|
||||||
|
trainingSets = new ArrayList<TrainingSet>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setData( final ExpandingArrayList<VariantDatum> data ) {
|
public void setData( final ExpandingArrayList<VariantDatum> data ) {
|
||||||
|
|
@ -104,6 +105,31 @@ public class VariantDataManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addTrainingSet( final TrainingSet trainingSet ) {
|
||||||
|
trainingSets.add( trainingSet );
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean checkHasTrainingSet() {
|
||||||
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
|
if( trainingSet.isTraining ) { return true; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean checkHasTruthSet() {
|
||||||
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
|
if( trainingSet.isTruth ) { return true; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean checkHasKnownSet() {
|
||||||
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
|
if( trainingSet.isKnown ) { return true; }
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public ExpandingArrayList<VariantDatum> getTrainingData() {
|
public ExpandingArrayList<VariantDatum> getTrainingData() {
|
||||||
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
|
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
|
||||||
for( final VariantDatum datum : data ) {
|
for( final VariantDatum datum : data ) {
|
||||||
|
|
@ -232,57 +258,35 @@ public class VariantDataManager {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final HashMap<String, Double> rodToPriorMap,
|
public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
|
||||||
final List<RodBinding<VariantContext>> training, final List<RodBinding<VariantContext>> truth, final List<RodBinding<VariantContext>> known, final List<RodBinding<VariantContext>> badSites, final List<RodBinding<VariantContext>> resource) {
|
|
||||||
datum.isKnown = false;
|
datum.isKnown = false;
|
||||||
datum.atTruthSite = false;
|
datum.atTruthSite = false;
|
||||||
datum.atTrainingSite = false;
|
datum.atTrainingSite = false;
|
||||||
datum.atAntiTrainingSite = false;
|
datum.atAntiTrainingSite = false;
|
||||||
datum.prior = 2.0;
|
datum.prior = 2.0;
|
||||||
|
|
||||||
//BUGBUG: need to clean this up
|
for( final TrainingSet trainingSet : trainingSets ) {
|
||||||
|
for( final VariantContext trainVC : tracker.getValues(trainingSet.rodBinding, genomeLoc) ) {
|
||||||
for( final RodBinding<VariantContext> rod : training ) {
|
|
||||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
|
||||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
||||||
datum.atTrainingSite = true;
|
datum.isKnown = datum.isKnown || trainingSet.isKnown;
|
||||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
|
||||||
|
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
|
||||||
|
datum.prior = Math.max( datum.prior, trainingSet.prior );
|
||||||
|
datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 );
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
for( final RodBinding<VariantContext> rod : truth ) {
|
|
||||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
|
||||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
|
||||||
datum.atTruthSite = true;
|
|
||||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for( final RodBinding<VariantContext> rod : known ) {
|
|
||||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
|
||||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
|
||||||
datum.isKnown = true;
|
|
||||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for( final RodBinding<VariantContext> rod : resource ) {
|
|
||||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
|
||||||
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
|
|
||||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for( final RodBinding<VariantContext> rod : badSites ) {
|
|
||||||
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
|
|
||||||
if( trainVC != null ) {
|
if( trainVC != null ) {
|
||||||
datum.atAntiTrainingSite = true;
|
datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining;
|
||||||
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
||||||
|
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
||||||
|
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
||||||
|
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
|
||||||
|
}
|
||||||
|
|
||||||
public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) {
|
public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) {
|
||||||
for( final VariantDatum datum : data ) {
|
for( final VariantDatum datum : data ) {
|
||||||
RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s",
|
RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s",
|
||||||
|
|
@ -290,10 +294,4 @@ public class VariantDataManager {
|
||||||
(datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
|
(datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
|
|
||||||
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
|
|
||||||
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
|
|
||||||
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -77,16 +77,15 @@ import java.util.*;
|
||||||
* <p>
|
* <p>
|
||||||
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
|
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
|
||||||
*
|
*
|
||||||
* <h2>Examples</h2>
|
* <h2>Example</h2>
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -Xmx4g -jar GenomeAnalysisTK.jar \
|
* java -Xmx4g -jar GenomeAnalysisTK.jar \
|
||||||
* -T VariantRecalibrator \
|
* -T VariantRecalibrator \
|
||||||
* -R reference/human_g1k_v37.fasta \
|
* -R reference/human_g1k_v37.fasta \
|
||||||
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
|
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
|
||||||
* -truth:prior=15.0 hapmap_3.3.b37.sites.vcf \
|
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
|
||||||
* -training:prior=15.0 hapmap_3.3.b37.sites.vcf \
|
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
|
||||||
* -training:prior=12.0 1000G_omni2.5.b37.sites.vcf \
|
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
|
||||||
* -known:prior=8.0 dbsnp_132.b37.vcf \
|
|
||||||
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ \
|
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ \
|
||||||
* -recalFile path/to/output.recal \
|
* -recalFile path/to/output.recal \
|
||||||
* -tranchesFile path/to/output.tranches \
|
* -tranchesFile path/to/output.tranches \
|
||||||
|
|
@ -112,34 +111,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
public List<RodBinding<VariantContext>> input;
|
public List<RodBinding<VariantContext>> input;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
|
* Any set of VCF files to use as lists of training, truth, or known sites.
|
||||||
*/
|
* Training - Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
|
||||||
@Input(fullName="training", shortName = "training", doc="A list of training variants used to train the Gaussian mixture model", required=true)
|
* Truth - When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
|
||||||
public List<RodBinding<VariantContext>> training;
|
* Known - The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
|
||||||
|
* Bad - In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list with a database of known bad variants.
|
||||||
/**
|
|
||||||
* When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
|
|
||||||
* Typically one might want to say I dropped my threshold until I got back 99% of HapMap sites, for example.
|
|
||||||
*/
|
|
||||||
@Input(fullName="truth", shortName = "truth", doc="A list of true variants to be used when deciding the truth sensitivity cut of the final callset", required=true)
|
|
||||||
public List<RodBinding<VariantContext>> truth;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
|
|
||||||
* The output metrics are stratified by known status in order to aid in comparisons with other call sets.
|
|
||||||
*/
|
|
||||||
@Input(fullName="known", shortName = "known", doc="A list of known variants to be used for metric comparison purposes", required=false)
|
|
||||||
public List<RodBinding<VariantContext>> known = Collections.emptyList();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list
|
|
||||||
* with a database of known bad variants. Maybe these are loci which are frequently filtered out in many projects (centromere, for example).
|
|
||||||
*/
|
|
||||||
@Input(fullName="badSites", shortName = "badSites", doc="A list of known bad variants used to supplement training the negative model", required=false)
|
|
||||||
public List<RodBinding<VariantContext>> badSites = Collections.emptyList();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Any set of sites for which you would like to apply a prior probability but for which you don't want to use as training, truth, or known sites.
|
|
||||||
*/
|
*/
|
||||||
@Input(fullName="resource", shortName = "resource", doc="A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm", required=false)
|
@Input(fullName="resource", shortName = "resource", doc="A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm", required=false)
|
||||||
public List<RodBinding<VariantContext>> resource = Collections.emptyList();
|
public List<RodBinding<VariantContext>> resource = Collections.emptyList();
|
||||||
|
|
@ -205,7 +181,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
private PrintStream tranchesStream;
|
private PrintStream tranchesStream;
|
||||||
private final Set<String> ignoreInputFilterSet = new TreeSet<String>();
|
private final Set<String> ignoreInputFilterSet = new TreeSet<String>();
|
||||||
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
|
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
|
||||||
private final HashMap<String, Double> rodToPriorMap = new HashMap<String, Double>();
|
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
|
|
@ -227,18 +202,15 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
|
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
final ArrayList<RodBinding<VariantContext>> allInputBindings = new ArrayList<RodBinding<VariantContext>>();
|
for( RodBinding<VariantContext> rod : resource ) {
|
||||||
allInputBindings.addAll(truth);
|
dataManager.addTrainingSet( new TrainingSet( rod ) );
|
||||||
allInputBindings.addAll(training);
|
|
||||||
allInputBindings.addAll(known);
|
|
||||||
allInputBindings.addAll(badSites);
|
|
||||||
allInputBindings.addAll(resource);
|
|
||||||
for( final RodBinding<VariantContext> rod : allInputBindings ) {
|
|
||||||
try {
|
|
||||||
rodToPriorMap.put(rod.getName(), (rod.getTags().containsKey("prior") ? Double.parseDouble(rod.getTags().getValue("prior")) : 0.0) );
|
|
||||||
} catch( NumberFormatException e ) {
|
|
||||||
throw new UserException.BadInput("Bad rod binding syntax. Prior key-value tag detected but isn't parsable. Expecting something like -training:prior=12.0 my.set.vcf");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( !dataManager.checkHasTrainingSet() ) {
|
||||||
|
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||||
|
}
|
||||||
|
if( !dataManager.checkHasTruthSet() ) {
|
||||||
|
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -270,7 +242,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
|
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
|
||||||
|
|
||||||
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
||||||
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites, resource ); // BUGBUG: need to clean this up to be a class, not a list of all the rod bindings
|
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
|
||||||
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||||
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
|
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
|
||||||
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||||
|
|
|
||||||
|
|
@ -41,11 +41,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b37KGReference +
|
"-R " + b37KGReference +
|
||||||
" -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
|
" -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
|
||||||
" -training:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
|
" -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
|
||||||
" -truth:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
|
" -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
|
||||||
" -training:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
|
|
||||||
" -truth:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
|
|
||||||
" -T VariantRecalibrator" +
|
" -T VariantRecalibrator" +
|
||||||
" -input " + params.inVCF +
|
" -input " + params.inVCF +
|
||||||
" -L 20:1,000,000-40,000,000" +
|
" -L 20:1,000,000-40,000,000" +
|
||||||
|
|
@ -89,9 +87,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-R " + b37KGReference +
|
"-R " + b37KGReference +
|
||||||
" -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
|
" -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
|
||||||
" -training:prior=15.0 " + comparisonDataLocation + "Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf" +
|
" -resource:training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf" +
|
||||||
" -truth:prior=15.0 " + comparisonDataLocation + "Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf" +
|
|
||||||
" -T VariantRecalibrator" +
|
" -T VariantRecalibrator" +
|
||||||
" -input " + params.inVCF +
|
" -input " + params.inVCF +
|
||||||
" -L 20:1,000,000-40,000,000" +
|
" -L 20:1,000,000-40,000,000" +
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,7 @@
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<!-- Recalibration analysis script -->
|
<!-- Recalibration analysis script -->
|
||||||
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.DinucCovariate" />
|
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.*" />
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.QualityScoreCovariate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate" />
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
</executable>
|
</executable>
|
||||||
<resources>
|
<resources>
|
||||||
|
|
|
||||||
|
|
@ -1,10 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="FindContaminatingReadGroups">
|
|
||||||
<executable name="FindContaminatingReadGroups">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<class name="org.broadinstitute.sting.playground.gatk.walkers.contamination.FindContaminatingReadGroupsWalker" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,20 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="GATKResources">
|
|
||||||
<resources>
|
|
||||||
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" />
|
|
||||||
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_129_b37.rod" />
|
|
||||||
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_130_b36.rod" />
|
|
||||||
<file name="/humgen/1kg/reference/human_b36_both.fasta" />
|
|
||||||
<file name="/humgen/1kg/reference/human_b36_both.dict" />
|
|
||||||
<file name="/humgen/1kg/reference/human_b36_both.fasta.fai" />
|
|
||||||
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod" />
|
|
||||||
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_130_hg18.rod" />
|
|
||||||
<file name="/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta" />
|
|
||||||
<file name="/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.dict" />
|
|
||||||
<file name="/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.fai" />
|
|
||||||
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_130_b37.rod" />
|
|
||||||
<file name="/humgen/1kg/reference/human_g1k_v37.fasta" />
|
|
||||||
<file name="/humgen/1kg/reference/human_g1k_v37.dict" />
|
|
||||||
<file name="/humgen/1kg/reference/human_g1k_v37.fasta.fai" />
|
|
||||||
</resources>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="IndelGenotyper">
|
|
||||||
<executable name="IndelGenotyper">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Local realignment around indels -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.indels.IndelGenotyperV2Walker" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="LocalRealignmentAroundIndels">
|
|
||||||
<executable name="LocalRealignmentAroundIndels">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Local realignment around indels -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.indels.RealignerTargetCreator" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="QualityScoresRecalibration">
|
|
||||||
<executable name="QualityScoresRecalibration">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Quality scores recalibration -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CovariateCounterWalker" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.TableRecalibrationWalker" />
|
|
||||||
<!-- Recalibration Covariates -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.DinucCovariate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.QualityScoreCovariate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="RMDIndexer">
|
|
||||||
<executable name="RMDIndexer">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.refdata.indexer.RMDIndexer" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<package name="org.broad.tribble.*" />
|
|
||||||
<package name="org.broadinstitute.sting.gatk.refdata.features.*" />
|
|
||||||
<!-- the class itself -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.refdata.indexer.RMDIndexer" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="UnifiedGenotyper">
|
|
||||||
<executable name="UnifiedGenotyper">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Unified genotyper -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,26 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="VariantAnnotator">
|
|
||||||
<executable name="VariantAnnotator">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Variant annotator -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator" />
|
|
||||||
<!-- The interfacess -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation" />
|
|
||||||
<!-- The annotations -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.DepthPerAlleleBySample" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.QualByDepth" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.HaplotypeScore" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="VariantEval">
|
|
||||||
<executable name="VariantEval">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.CountVariants" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.CompOverlap" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.GenotypeConcordance" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.TiTvVariantEvaluator" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.ValidationRate" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.VariantQualityScore" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.CountFunctionalClasses" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="VariantFiltration">
|
|
||||||
<executable name="VariantFiltration">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Workaround - depend on the logger impl required by JEXL -->
|
|
||||||
<package name="org.apache.commons.logging.impl" />
|
|
||||||
<!-- Variant filtration -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.filters.VariantFiltrationWalker" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -1,12 +0,0 @@
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<package name="VariantRecalibration">
|
|
||||||
<executable name="VariantRecalibration">
|
|
||||||
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
|
||||||
<resource-bundle file="StingText.properties" />
|
|
||||||
<dependencies>
|
|
||||||
<!-- Variant recalibration -->
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.variantrecalibration.GenerateVariantClustersWalker" />
|
|
||||||
<class name="org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator" />
|
|
||||||
</dependencies>
|
|
||||||
</executable>
|
|
||||||
</package>
|
|
||||||
|
|
@ -248,12 +248,10 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
this.reference_sequence = t.reference
|
this.reference_sequence = t.reference
|
||||||
this.intervalsString ++= List(t.intervals)
|
this.intervalsString ++= List(t.intervals)
|
||||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
||||||
this.training :+= new TaggedFile( t.hapmapFile, "prior=15.0")
|
this.resource :+= new TaggedFile( t.hapmapFile, "training=true,truth=true,prior=15.0" )
|
||||||
this.truth :+= new TaggedFile( t.hapmapFile, "prior=15.0")
|
this.resource :+= new TaggedFile( omni_b37, "training=true,truth=true,prior=12.0" )
|
||||||
this.training :+= new TaggedFile( omni_b37, "prior=12.0")
|
this.resource :+= new TaggedFile( training_1000G, "training=true,prior=10.0" )
|
||||||
this.truth :+= new TaggedFile( omni_b37, "prior=12.0")
|
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
|
||||||
this.training :+= new TaggedFile( training_1000G, "prior=10.0" )
|
|
||||||
this.known :+= new TaggedFile( t.dbsnpFile, "prior=2.0" )
|
|
||||||
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
|
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
|
||||||
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
|
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
|
||||||
if(t.nSamples >= 10) {
|
if(t.nSamples >= 10) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue