Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
7ba6c29d32
|
|
@ -12,7 +12,9 @@ if ( onCMDLine ) {
|
|||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||
outputPDF = NA
|
||||
}
|
||||
|
||||
|
|
@ -113,11 +115,22 @@ plotGroup <- function(groupTable) {
|
|||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
|
||||
|
||||
# histogram of job times by groupAnnotations
|
||||
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
|
||||
# todo -- how do we group by annotations?
|
||||
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
|
||||
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
|
||||
p <- p + opts(title=paste("Job runtime histogram for", name))
|
||||
print(p)
|
||||
}
|
||||
|
||||
# as above, but averaging over all iterations
|
||||
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||
if ( dim(sub)[1] > 1 ) {
|
||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||
}
|
||||
}
|
||||
|
||||
# print out some useful basic information
|
||||
|
|
@ -146,7 +159,7 @@ plotJobsGantt(gatkReportData, T)
|
|||
plotJobsGantt(gatkReportData, F)
|
||||
plotProgressByTime(gatkReportData)
|
||||
for ( group in gatkReportData ) {
|
||||
plotGroup(group)
|
||||
plotGroup(group)
|
||||
}
|
||||
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
|
|
|
|||
|
|
@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
if (!( walker instanceof TreeReducible ))
|
||||
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
|
||||
|
||||
traversalEngine.startTimers();
|
||||
ReduceTree reduceTree = new ReduceTree(this);
|
||||
initializeWalker(walker);
|
||||
|
||||
|
|
|
|||
|
|
@ -44,7 +44,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
* @param shardStrategy A strategy for sharding the data.
|
||||
*/
|
||||
public Object execute(Walker walker, ShardStrategy shardStrategy) {
|
||||
traversalEngine.startTimers();
|
||||
walker.initialize();
|
||||
Accumulator accumulator = Accumulator.create(engine,walker);
|
||||
|
||||
|
|
@ -54,6 +53,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
if ( done || shard == null ) // we ran out of shards that aren't owned
|
||||
break;
|
||||
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
|
|||
|
||||
public Object call() {
|
||||
try {
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
Object accumulator = walker.reduceInit();
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.annotation.Strand;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.dbsnp.OldDbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
|
|
@ -93,27 +93,27 @@ public class VariantContextAdaptors {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class DBSnpAdaptor implements VCAdaptor {
|
||||
private static boolean isSNP(DbSNPFeature feature) {
|
||||
private static boolean isSNP(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
||||
}
|
||||
|
||||
private static boolean isMNP(DbSNPFeature feature) {
|
||||
private static boolean isMNP(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
|
||||
}
|
||||
|
||||
private static boolean isInsertion(DbSNPFeature feature) {
|
||||
private static boolean isInsertion(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("insertion");
|
||||
}
|
||||
|
||||
private static boolean isDeletion(DbSNPFeature feature) {
|
||||
private static boolean isDeletion(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("deletion");
|
||||
}
|
||||
|
||||
private static boolean isIndel(DbSNPFeature feature) {
|
||||
private static boolean isIndel(OldDbSNPFeature feature) {
|
||||
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
|
||||
}
|
||||
|
||||
public static boolean isComplexIndel(DbSNPFeature feature) {
|
||||
public static boolean isComplexIndel(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("in-del");
|
||||
}
|
||||
|
||||
|
|
@ -125,7 +125,7 @@ public class VariantContextAdaptors {
|
|||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlternateAlleleList(DbSNPFeature feature) {
|
||||
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (String allele : getAlleleList(feature))
|
||||
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
||||
|
|
@ -139,7 +139,7 @@ public class VariantContextAdaptors {
|
|||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlleleList(DbSNPFeature feature) {
|
||||
public static List<String> getAlleleList(OldDbSNPFeature feature) {
|
||||
List<String> alleleList = new ArrayList<String>();
|
||||
// add ref first
|
||||
if ( feature.getStrand() == Strand.POSITIVE )
|
||||
|
|
@ -156,14 +156,14 @@ public class VariantContextAdaptors {
|
|||
|
||||
/**
|
||||
* Converts non-VCF formatted dbSNP records to VariantContext.
|
||||
* @return DbSNPFeature.
|
||||
* @return OldDbSNPFeature.
|
||||
*/
|
||||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return DbSNPFeature.class; }
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
|
||||
|
||||
@Override
|
||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
DbSNPFeature dbsnp = (DbSNPFeature)input;
|
||||
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
|
||||
|
|
|
|||
|
|
@ -115,12 +115,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
|
||||
|
||||
/** We use the SimpleTimer to time our run */
|
||||
private SimpleTimer timer = new SimpleTimer("Traversal");
|
||||
private SimpleTimer timer = null;
|
||||
|
||||
// How long can we go without printing some progress info?
|
||||
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
||||
private int printProgressCheckCounter = 0;
|
||||
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
||||
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
|
||||
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
||||
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
||||
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
||||
|
|
@ -209,11 +210,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
}
|
||||
}
|
||||
/**
|
||||
* Should be called to indicate that we're going to process records and the timer should start ticking
|
||||
* Should be called to indicate that we're going to process records and the timer should start ticking. This
|
||||
* function should be called right before any traversal work is done, to avoid counting setup costs in the
|
||||
* processing costs and inflating the estimated runtime.
|
||||
*/
|
||||
public void startTimers() {
|
||||
timer.start();
|
||||
lastProgressPrintTime = timer.currentTime();
|
||||
public void startTimersIfNecessary() {
|
||||
if ( timer == null ) {
|
||||
timer = new SimpleTimer("Traversal");
|
||||
timer.start();
|
||||
lastProgressPrintTime = timer.currentTime();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -224,7 +230,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
* @return true if the maximum interval (in millisecs) has passed since the last printing
|
||||
*/
|
||||
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
|
||||
return (curTime - lastPrintTime) > printFreq;
|
||||
long elapsed = curTime - lastPrintTime;
|
||||
return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -79,24 +79,45 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
/**
|
||||
* If this argument is present, the original allele frequencies and counts from this vcf are added as annotations ACH,AFH and ANH. at each record present in this vcf
|
||||
*/
|
||||
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
|
||||
public RodBinding<VariantContext> comp;
|
||||
|
||||
|
||||
/**
|
||||
* This required argument is used to annotate each site in the vcf INFO field with R2 annotation. Will be NaN if Beagle determined there are no variant samples.
|
||||
*/
|
||||
@Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true)
|
||||
public RodBinding<BeagleFeature> beagleR2;
|
||||
|
||||
/**
|
||||
* These values will populate the GL field for each sample and contain the posterior probability of each genotype given the data after phasing and imputation.
|
||||
*/
|
||||
@Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true)
|
||||
public RodBinding<BeagleFeature> beagleProbs;
|
||||
|
||||
/**
|
||||
* By default, all genotypes will be marked in the VCF as "phased", using the "|" separator after Beagle.
|
||||
*/
|
||||
@Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true)
|
||||
public RodBinding<BeagleFeature> beaglePhased;
|
||||
|
||||
@Output(doc="VCF File to which variants should be written",required=true)
|
||||
protected VCFWriter vcfWriter = null;
|
||||
|
||||
/**
|
||||
* If this argument is absent, and if Beagle determines that there is no sample in a site that has a variant genotype, the site will be marked as filtered (Default behavior).
|
||||
* If the argument is present, the site won't be marked as filtered under this condition even if there are no variant genotypes.
|
||||
*/
|
||||
@Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false)
|
||||
public boolean DONT_FILTER_MONOMORPHIC_SITES = false;
|
||||
|
||||
/**
|
||||
* Value between 0 and 1. If the probability of getting a genotype correctly (based on the posterior genotype probabilities and the actual genotype) is below this threshold,
|
||||
* a genotype will be substitute by a no-call.
|
||||
*/
|
||||
@Argument(fullName="no" +
|
||||
"call_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false)
|
||||
private double noCallThreshold = 0.0;
|
||||
|
|
|
|||
|
|
@ -112,6 +112,9 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
|
||||
VCFWriter bootstrapVCFOutput = null;
|
||||
|
||||
/**
|
||||
* If sample gender is known, this flag should be set to true to ensure that Beagle treats male Chr X properly.
|
||||
*/
|
||||
@Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
|
||||
public boolean CHECK_IS_MALE_ON_CHR_X = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -105,10 +105,19 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})
|
||||
Map<DoCOutputType,PrintStream> out;
|
||||
|
||||
/**
|
||||
* Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin.
|
||||
*/
|
||||
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
|
||||
int start = 1;
|
||||
/**
|
||||
* Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin.
|
||||
*/
|
||||
@Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false)
|
||||
int stop = 500;
|
||||
/**
|
||||
* Sets the number of bins for granular binning
|
||||
*/
|
||||
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
|
||||
int nBins = 499;
|
||||
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false)
|
||||
|
|
@ -119,28 +128,59 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
byte minBaseQuality = -1;
|
||||
@Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false)
|
||||
byte maxBaseQuality = Byte.MAX_VALUE;
|
||||
/**
|
||||
* Instead of reporting depth, report the base pileup at each locus
|
||||
*/
|
||||
@Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false)
|
||||
boolean printBaseCounts = false;
|
||||
/**
|
||||
* Do not tabulate locus statistics (# loci covered by sample by coverage)
|
||||
*/
|
||||
@Argument(fullName = "omitLocusTable", shortName = "omitLocusTable", doc = "Will not calculate the per-sample per-depth counts of loci, which should result in speedup", required = false)
|
||||
boolean omitLocusTable = false;
|
||||
/**
|
||||
* Do not tabulate interval statistics (mean, median, quartiles AND # intervals by sample by coverage)
|
||||
*/
|
||||
@Argument(fullName = "omitIntervalStatistics", shortName = "omitIntervals", doc = "Will omit the per-interval statistics section, which should result in speedup", required = false)
|
||||
boolean omitIntervals = false;
|
||||
/**
|
||||
* Do not print the total coverage at every base
|
||||
*/
|
||||
@Argument(fullName = "omitDepthOutputAtEachBase", shortName = "omitBaseOutput", doc = "Will omit the output of the depth of coverage at each base, which should result in speedup", required = false)
|
||||
boolean omitDepthOutput = false;
|
||||
@Argument(fullName = "printBinEndpointsAndExit", doc = "Prints the bin values and exits immediately. Use to calibrate what bins you want before running on data.", required = false)
|
||||
boolean printBinEndpointsAndExit = false;
|
||||
/**
|
||||
* Do not tabulate the sample summary statistics (total, mean, median, quartile coverage per sample)
|
||||
*/
|
||||
@Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false)
|
||||
boolean omitSampleSummary = false;
|
||||
/**
|
||||
* A way of partitioning reads into groups. Can be sample, readgroup, or library.
|
||||
*/
|
||||
@Argument(fullName = "partitionType", shortName = "pt", doc = "Partition type for depth of coverage. Defaults to sample. Can be any combination of sample, readgroup, library.", required = false)
|
||||
Set<DoCOutputType.Partition> partitionTypes = EnumSet.of(DoCOutputType.Partition.sample);
|
||||
/**
|
||||
* Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output.
|
||||
*/
|
||||
@Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false)
|
||||
boolean includeDeletions = false;
|
||||
@Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
|
||||
boolean ignoreDeletionSites = false;
|
||||
|
||||
/**
|
||||
* Path to the RefSeq file for use in aggregating coverage statistics over genes
|
||||
*/
|
||||
@Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false)
|
||||
File refSeqGeneList = null;
|
||||
/**
|
||||
* The format of the output file
|
||||
*/
|
||||
@Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false)
|
||||
String outputFormat = "rtable";
|
||||
/**
|
||||
* A coverage threshold for summarizing (e.g. % bases >= CT for each sample)
|
||||
*/
|
||||
@Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false)
|
||||
int[] coverageThresholds = {15};
|
||||
|
||||
|
|
@ -963,4 +1003,4 @@ class CoveragePartitioner {
|
|||
public Map<DoCOutputType.Partition,List<String>> getIdentifiersByType() {
|
||||
return identifiersByType;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
|||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -239,7 +236,8 @@ public class UnifiedGenotyperEngine {
|
|||
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles);
|
||||
if ( vcInput == null )
|
||||
return null;
|
||||
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
|
||||
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase());
|
||||
|
||||
} else {
|
||||
// deal with bad/non-standard reference bases
|
||||
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
|
||||
|
|
|
|||
|
|
@ -77,6 +77,11 @@ import java.util.*;
|
|||
* if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
|
||||
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
|
||||
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
|
||||
*
|
||||
* <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
|
||||
* please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
|
||||
*
|
||||
*
|
||||
*/
|
||||
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
|
||||
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||
|
|
|
|||
|
|
@ -93,20 +93,30 @@ import java.util.List;
|
|||
*/
|
||||
@Requires(value={DataSource.REFERENCE})
|
||||
public class ValidationAmplicons extends RodWalker<Integer,Integer> {
|
||||
/**
|
||||
* A Table-formatted file listing amplicon contig, start, stop, and a name for the amplicon (or probe)
|
||||
*/
|
||||
@Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+
|
||||
"intervals surrounding the probe sites amplicons should be designed for", required=true)
|
||||
RodBinding<TableFeature> probeIntervals;
|
||||
|
||||
/**
|
||||
* A VCF file containing the bi-allelic sites for validation. Filtered records will prompt a warning, and will be flagged as filtered in the output fastq.
|
||||
*/
|
||||
@Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true)
|
||||
RodBinding<VariantContext> validateAlleles;
|
||||
|
||||
/**
|
||||
* A VCF file containing variants to be masked. A mask variant overlapping a validation site will be ignored at the validation site.
|
||||
*/
|
||||
@Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true)
|
||||
RodBinding<VariantContext> maskAlleles;
|
||||
|
||||
|
||||
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
|
||||
boolean lowerCaseSNPs = false;
|
||||
|
||||
/**
|
||||
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
|
||||
* This changes the size of the k-mer used for alignment.
|
||||
*/
|
||||
@Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false)
|
||||
int virtualPrimerSize = 20;
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
|
||||
|
|
@ -24,6 +25,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
|
@ -122,9 +124,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false)
|
||||
protected Boolean NO_STANDARD_STRATIFICATIONS = false;
|
||||
|
||||
@Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false)
|
||||
protected Set<VariantContext.Type> typesToUse = null;
|
||||
|
||||
/**
|
||||
* See the -list argument to view available modules.
|
||||
*/
|
||||
|
|
@ -150,9 +149,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||
|
||||
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
|
||||
private String TRANCHE_FILENAME = null;
|
||||
|
||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||
private File ancestralAlignmentsFile = null;
|
||||
|
||||
|
|
@ -233,16 +229,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
jexlExpressions.add(sjexl);
|
||||
}
|
||||
|
||||
// Add select expressions for anything in the tranches file
|
||||
if ( TRANCHE_FILENAME != null ) {
|
||||
// we are going to build a few select names automatically from the tranches file
|
||||
for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
|
||||
logger.info("Adding select for all variant above the pCut of : " + t);
|
||||
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
|
||||
SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the set of stratifications and evaluations to use
|
||||
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||
|
|
@ -317,9 +303,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// find the comp
|
||||
final VariantContext comp = findMatchingComp(eval, compSet);
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>();
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
ArrayList<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
|
||||
List<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
|
||||
stateMap.put(vs, states);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean evalIsGood = eval != null && eval.isVariant();
|
||||
boolean evalIsGood = eval != null && eval.isPolymorphic();
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
|
||||
|
||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
||||
|
|
|
|||
|
|
@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
||||
// added in a proxy check for monomorphic status here.
|
||||
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
||||
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
|
||||
if ( vc1.isMonomorphic() ) {
|
||||
nRefLoci++;
|
||||
} else {
|
||||
switch (vc1.getType()) {
|
||||
case NO_VARIATION:
|
||||
// shouldn't get here
|
||||
break;
|
||||
case SNP:
|
||||
nVariantLoci++;
|
||||
|
|
|
|||
|
|
@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
public int getComparisonOrder() { return 1; } // need only the evals
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
|
||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
if ( vc1.isIndel() ) {
|
||||
if ( vc1.isIndel() && vc1.isPolymorphic() ) {
|
||||
|
||||
if ( ! vc1.isBiallelic() ) {
|
||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
// only count simple insertions/deletions, not complex indels
|
||||
if ( vc1.isSimpleInsertion() ) {
|
||||
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||
} else if ( vc1.isSimpleDeletion() ) {
|
||||
indelHistogram.update(-vc1.getReference().length());
|
||||
} else {
|
||||
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
|
|||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if (eval != null ) {
|
||||
if (eval != null && eval.isPolymorphic()) {
|
||||
if ( indelStats == null ) {
|
||||
indelStats = new IndelStats(eval);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
|||
}
|
||||
}
|
||||
|
||||
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
|
||||
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
|
||||
metrics.incrValue(eval);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
|
||||
return null; //no interesting sites
|
||||
}
|
||||
|
||||
if (vc.hasGenotypes()) {
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
|
||||
//this maps allele to a count
|
||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
|
||||
int numHetsHere = 0;
|
||||
float numGenosHere = 0;
|
||||
int numIndsHere = 0;
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
|
||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||
numIndsHere++;
|
||||
if (!genotype.isNoCall()) {
|
||||
//increment stats for heterozygosity
|
||||
if (genotype.isHet()) {
|
||||
numHetsHere++;
|
||||
}
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
numGenosHere++;
|
||||
//increment stats for pairwise mismatches
|
||||
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
for (Allele allele : genotype.getAlleles()) {
|
||||
if (allele.isNonNull() && allele.isCalled()) {
|
||||
String alleleString = allele.toString();
|
||||
alleleCounts.putIfAbsent(alleleString, 0);
|
||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
}
|
||||
if (numGenosHere > 0) {
|
||||
//only if have one called genotype at least
|
||||
this.numSites++;
|
||||
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
this.totalHet += numHetsHere / numGenosHere;
|
||||
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
//compute based on num sites
|
||||
float harmonicFactor = 0;
|
||||
for (int i = 1; i <= numIndsHere; i++) {
|
||||
harmonicFactor += 1.0 / i;
|
||||
}
|
||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
//now compute pairwise mismatches
|
||||
float numPairwise = 0;
|
||||
float numDiffs = 0;
|
||||
for (String allele1 : alleleCounts.keySet()) {
|
||||
int allele1Count = alleleCounts.get(allele1);
|
||||
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
for (String allele2 : alleleCounts.keySet()) {
|
||||
if (allele1.compareTo(allele2) < 0) {
|
||||
continue;
|
||||
}
|
||||
if (allele1 .compareTo(allele2) == 0) {
|
||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int allele2Count = alleleCounts.get(allele2);
|
||||
numPairwise += allele1Count * allele2Count;
|
||||
numDiffs += allele1Count * allele2Count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
if (numPairwise > 0) {
|
||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
|
||||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
|
||||
if (VariantContextUtils.isTransition(vc)) {
|
||||
if (updateStandard) nTiInComp++;
|
||||
else nTi++;
|
||||
|
|
|
|||
|
|
@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||
if ( ! vc.isVariant() ) return SiteStatus.MONO;
|
||||
if ( vc.isMonomorphic() ) return SiteStatus.MONO;
|
||||
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
|
||||
|
||||
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
int ac = 0;
|
||||
|
|
@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
else
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else if ( vc.hasGenotypes() ) {
|
||||
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else {
|
||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
|
||||
|
|
|
|||
|
|
@ -10,10 +10,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies the eval RODs by the allele count of the alternate allele
|
||||
*
|
||||
* Looks at the AC value in the INFO field, and uses that value if present. If absent,
|
||||
* computes the AC from the genotypes themselves. For no AC can be computed, 0 is used.
|
||||
*/
|
||||
public class AlleleCount extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states = new ArrayList<String>();
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
List<RodBinding<VariantContext>> evals = getVariantEvalWalker().getEvals();
|
||||
|
|
@ -35,11 +38,7 @@ public class AlleleCount extends VariantStratifier {
|
|||
getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>(1);
|
||||
|
||||
if (eval != null) {
|
||||
|
|
|
|||
|
|
@ -6,11 +6,15 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies the eval RODs by the allele frequency of the alternate allele
|
||||
*
|
||||
* Uses a constant 0.005 frequency grid, and projects the AF INFO field value. Requires
|
||||
* that AF be present in every ROD, otherwise this stratification throws an exception
|
||||
*/
|
||||
public class AlleleFrequency extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
|
|
@ -19,11 +23,7 @@ public class AlleleFrequency extends VariantStratifier {
|
|||
}
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
if (eval != null) {
|
||||
|
|
|
|||
|
|
@ -6,22 +6,21 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* Required stratification grouping output by each comp ROD
|
||||
*/
|
||||
public class CompRod extends VariantStratifier implements RequiredStratification {
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
for ( RodBinding<VariantContext> rod : getVariantEvalWalker().getComps() )
|
||||
states.add(rod.getName());
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add(compName);
|
||||
|
|
|
|||
|
|
@ -5,23 +5,19 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies the evaluation by each contig in the reference sequence
|
||||
*/
|
||||
public class Contig extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
states.addAll(getVariantEvalWalker().getContigNames());
|
||||
states.add("all");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
if (eval != null) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site
|
||||
|
|
@ -19,21 +20,14 @@ import java.util.ArrayList;
|
|||
* A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G.
|
||||
*/
|
||||
public class CpG extends VariantStratifier {
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("CpG");
|
||||
states.add("non_CpG");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
boolean isCpG = false;
|
||||
if (ref != null && ref.getBases() != null) {
|
||||
String fwRefBases = new String(ref.getBases());
|
||||
|
|
|
|||
|
|
@ -7,10 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Experimental stratification by the degeneracy of an amino acid, according to VCF annotation. Not safe
|
||||
*/
|
||||
public class Degeneracy extends VariantStratifier {
|
||||
private ArrayList<String> states;
|
||||
|
||||
private HashMap<String, HashMap<Integer, String>> degeneracies;
|
||||
|
||||
@Override
|
||||
|
|
@ -77,11 +79,7 @@ public class Degeneracy extends VariantStratifier {
|
|||
}
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
|
|
|
|||
|
|
@ -6,10 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Required stratification grouping output by each eval ROD
|
||||
*/
|
||||
public class EvalRod extends VariantStratifier implements RequiredStratification {
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
|
|
@ -17,11 +19,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
|
|||
states.add(rod.getName());
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add(evalName);
|
||||
|
|
|
|||
|
|
@ -5,24 +5,20 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies by the FILTER status (PASS, FAIL) of the eval records
|
||||
*/
|
||||
public class Filter extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
states.add("called");
|
||||
states.add("filtered");
|
||||
states.add("raw");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("raw");
|
||||
|
|
|
|||
|
|
@ -5,25 +5,22 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
|
||||
*/
|
||||
public class FunctionalClass extends VariantStratifier {
|
||||
// needs to know the variant context
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>();
|
||||
states.add("all");
|
||||
states.add("silent");
|
||||
states.add("missense");
|
||||
states.add("nonsense");
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
|
||||
relevantStates.add("all");
|
||||
|
|
|
|||
|
|
@ -6,30 +6,30 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Stratifies the eval RODs by user-supplied JEXL expressions
|
||||
*
|
||||
* See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details
|
||||
*/
|
||||
public class JexlExpression extends VariantStratifier implements StandardStratification {
|
||||
// needs to know the jexl expressions
|
||||
private Set<SortableJexlVCMatchExp> jexlExpressions;
|
||||
private ArrayList<String> states;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
jexlExpressions = getVariantEvalWalker().getJexlExpressions();
|
||||
|
||||
states = new ArrayList<String>();
|
||||
states.add("none");
|
||||
for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) {
|
||||
states.add(jexlExpression.name);
|
||||
}
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add("none");
|
||||
|
||||
|
|
|
|||
|
|
@ -7,32 +7,31 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Stratifies by whether a site in in the list of known RODs (e.g., dbsnp by default)
|
||||
*/
|
||||
public class Novelty extends VariantStratifier implements StandardStratification {
|
||||
// needs the variant contexts and known names
|
||||
private List<RodBinding<VariantContext>> knowns;
|
||||
final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
|
||||
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
|
||||
knowns = getVariantEvalWalker().getKnowns();
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if (tracker != null && eval != null) {
|
||||
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
|
||||
for ( final VariantContext c : knownComps ) {
|
||||
// loop over sites, looking for something that matches the type eval
|
||||
if ( eval.getType() == c.getType() ) {
|
||||
return new ArrayList<String>(Arrays.asList("all", "known"));
|
||||
return Arrays.asList("all", "known");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new ArrayList<String>(Arrays.asList("all", "novel"));
|
||||
return Arrays.asList("all", "novel");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,26 +4,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies the eval RODs by each sample in the eval ROD.
|
||||
*
|
||||
* This allows the system to analyze each sample separately. Since many evaluations
|
||||
* only consider non-reference sites, stratifying by sample results in meaningful
|
||||
* calculations for CompOverlap
|
||||
*/
|
||||
public class Sample extends VariantStratifier {
|
||||
// needs the sample names
|
||||
private ArrayList<String> samples;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
samples = new ArrayList<String>();
|
||||
samples.addAll(getVariantEvalWalker().getSampleNamesForStratification());
|
||||
states.addAll(getVariantEvalWalker().getSampleNamesForStratification());
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return samples;
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||
relevantStates.add(sampleName);
|
||||
|
||||
return relevantStates;
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
return Arrays.asList(sampleName);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,9 +6,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
|||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public abstract class VariantStratifier implements Comparable {
|
||||
private VariantEvalWalker variantEvalWalker;
|
||||
protected ArrayList<String> states = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* @return a reference to the parent VariantEvalWalker running this stratification
|
||||
|
|
@ -27,15 +30,15 @@ public abstract class VariantStratifier implements Comparable {
|
|||
|
||||
public abstract void initialize();
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return new ArrayList<String>();
|
||||
}
|
||||
|
||||
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public int compareTo(Object o1) {
|
||||
return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
public ArrayList<String> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Stratifies the eval variants by their type (SNP, INDEL, ETC)
|
||||
*/
|
||||
public class VariantType extends VariantStratifier {
|
||||
@Override
|
||||
public void initialize() {
|
||||
for ( VariantContext.Type t : VariantContext.Type.values() ) {
|
||||
states.add(t.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
return eval == null ? Collections.<String>emptyList() : Arrays.asList(eval.getType().toString());
|
||||
}
|
||||
}
|
||||
|
|
@ -266,10 +266,7 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested sample
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
|
||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||
sampleNames.add(sampleName);
|
||||
|
||||
return getSubsetOfVariantContext(vc, sampleNames);
|
||||
return getSubsetOfVariantContext(vc, Arrays.asList(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -371,12 +368,12 @@ public class VariantEvalUtils {
|
|||
* @param stateKeys all the state keys
|
||||
* @return a list of state keys
|
||||
*/
|
||||
public ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, ArrayList<String>> stateMap, Stack<HashMap<VariantStratifier, ArrayList<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) {
|
||||
public ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, List<String>> stateMap, Stack<HashMap<VariantStratifier, List<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) {
|
||||
if (stateStack == null) {
|
||||
stateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
|
||||
stateStack = new Stack<HashMap<VariantStratifier, List<String>>>();
|
||||
|
||||
for (VariantStratifier vs : stateMap.keySet()) {
|
||||
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = new HashMap<VariantStratifier, ArrayList<String>>();
|
||||
HashMap<VariantStratifier, List<String>> oneSetOfStates = new HashMap<VariantStratifier, List<String>>();
|
||||
oneSetOfStates.put(vs, stateMap.get(vs));
|
||||
|
||||
stateStack.add(oneSetOfStates);
|
||||
|
|
@ -384,10 +381,10 @@ public class VariantEvalUtils {
|
|||
}
|
||||
|
||||
if (!stateStack.isEmpty()) {
|
||||
Stack<HashMap<VariantStratifier, ArrayList<String>>> newStateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>();
|
||||
Stack<HashMap<VariantStratifier, List<String>>> newStateStack = new Stack<HashMap<VariantStratifier, List<String>>>();
|
||||
newStateStack.addAll(stateStack);
|
||||
|
||||
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = newStateStack.pop();
|
||||
HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
|
||||
VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
|
||||
|
||||
for (String state : oneSetOfStates.get(vs)) {
|
||||
|
|
|
|||
|
|
@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
|
||||
public Set<String> sampleExpressions ;
|
||||
|
||||
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
||||
@Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
|
||||
public Set<File> sampleFiles;
|
||||
|
||||
/**
|
||||
|
|
@ -226,7 +226,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
|||
/**
|
||||
* Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded.
|
||||
*/
|
||||
@Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
|
||||
@Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
|
||||
public Set<File> XLsampleFiles = new HashSet<File>(0);
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
|
|
@ -41,7 +40,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
|
|
@ -168,14 +166,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
// get the RS IDs
|
||||
Set<String> rsIDs = null;
|
||||
if ( tracker.hasValues(dbsnp.dbsnp) ) {
|
||||
List<VariantContext> dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus());
|
||||
rsIDs = new HashSet<String>();
|
||||
for ( Object d : dbsnpList ) {
|
||||
if (d instanceof DbSNPFeature )
|
||||
rsIDs.add(((DbSNPFeature)d).getRsID());
|
||||
else if (d instanceof VariantContext )
|
||||
rsIDs.add(((VariantContext)d).getID());
|
||||
}
|
||||
for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) )
|
||||
rsIDs.add(rsID.getID());
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -63,6 +63,10 @@ public class ReadClipper {
|
|||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
// TODO add requires statement/check in the Hardclip function
|
||||
if ( start > stop )
|
||||
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
||||
|
||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ public class PileupElement {
|
|||
// --------------------------------------------------------------------------
|
||||
|
||||
private Integer getReducedReadQualityTagValue() {
|
||||
return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
|
||||
return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
|
||||
}
|
||||
|
||||
public boolean isReducedRead() {
|
||||
|
|
|
|||
|
|
@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord {
|
|||
// because some values can be null, we don't want to duplicate effort
|
||||
private boolean retrievedReadGroup = false;
|
||||
|
||||
/** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */
|
||||
private boolean lookedUpReducedReadQuality = false;
|
||||
private Integer reducedReadQuality;
|
||||
|
||||
// These temporary attributes were added here to make life easier for
|
||||
// certain algorithms by providing a way to label or attach arbitrary data to
|
||||
// individual GATKSAMRecords.
|
||||
|
|
@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord {
|
|||
|
||||
public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); }
|
||||
|
||||
public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); }
|
||||
public Integer getIntegerAttribute(final String tag) {
|
||||
if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) {
|
||||
if ( ! lookedUpReducedReadQuality ) {
|
||||
lookedUpReducedReadQuality = true;
|
||||
reducedReadQuality = mRecord.getIntegerAttribute(tag);
|
||||
}
|
||||
return reducedReadQuality;
|
||||
} else {
|
||||
return mRecord.getIntegerAttribute(tag);
|
||||
}
|
||||
}
|
||||
|
||||
public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); }
|
||||
|
||||
|
|
|
|||
|
|
@ -983,7 +983,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return true if it's monomorphic
|
||||
*/
|
||||
public boolean isMonomorphic() {
|
||||
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount();
|
||||
return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -127,6 +127,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
Object accumulator = countReadWalker.reduceInit();
|
||||
|
||||
while (shardStrategy.hasNext()) {
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
Shard shard = shardStrategy.next();
|
||||
|
||||
if (shard == null) {
|
||||
|
|
|
|||
|
|
@ -261,10 +261,10 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
return String.format("%s -select '%s' -selectName %s", cmd, select, name);
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test(enabled = false) // no longer supported in the GATK
|
||||
public void testTranches() {
|
||||
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
|
||||
executeTestParallel("testTranches",spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
|
||||
" --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
|
||||
" -T VariantsToVCF" +
|
||||
" -L 1:1-30,000,000" +
|
||||
" -o %s" +
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ import net.sf.samtools.SAMFileReader
|
|||
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||
|
||||
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||
import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction}
|
||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||
|
||||
class DataProcessingPipeline extends QScript {
|
||||
qscript =>
|
||||
|
|
@ -31,19 +31,14 @@ class DataProcessingPipeline extends QScript {
|
|||
var reference: File = _
|
||||
|
||||
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
|
||||
var dbSNP: File = _
|
||||
var dbSNP: List[File] = List()
|
||||
|
||||
/****************************************************************************
|
||||
* Optional Parameters
|
||||
****************************************************************************/
|
||||
|
||||
|
||||
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
|
||||
// var sortSamJar: File = _
|
||||
//
|
||||
|
||||
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
||||
var indels: File = _
|
||||
var indels: List[File] = List()
|
||||
|
||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||
var bwaPath: File = _
|
||||
|
|
@ -132,24 +127,6 @@ class DataProcessingPipeline extends QScript {
|
|||
}
|
||||
}
|
||||
return sampleTable.toMap
|
||||
|
||||
// println("\n\n*** INPUT FILES ***\n")
|
||||
// // Creating one file for each sample in the dataset
|
||||
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
// for ((sample, flist) <- sampleTable) {
|
||||
//
|
||||
// println(sample + ":")
|
||||
// for (f <- flist)
|
||||
// println (f)
|
||||
// println()
|
||||
//
|
||||
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||
// sampleBamFiles(sample) = sampleFileName
|
||||
// //add(writeList(flist, sampleFileName))
|
||||
// }
|
||||
// println("*** INPUT FILES ***\n\n")
|
||||
//
|
||||
// return sampleBamFiles.toMap
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
|
|
@ -321,9 +298,9 @@ class DataProcessingPipeline extends QScript {
|
|||
this.input_file = inBams
|
||||
this.out = outIntervals
|
||||
this.mismatchFraction = 0.0
|
||||
this.known :+= qscript.dbSNP
|
||||
this.known ++= qscript.dbSNP
|
||||
if (indels != null)
|
||||
this.known :+= qscript.indels
|
||||
this.known ++= qscript.indels
|
||||
this.scatterCount = nContigs
|
||||
this.analysisName = queueLogDir + outIntervals + ".target"
|
||||
this.jobName = queueLogDir + outIntervals + ".target"
|
||||
|
|
@ -333,9 +310,9 @@ class DataProcessingPipeline extends QScript {
|
|||
this.input_file = inBams
|
||||
this.targetIntervals = tIntervals
|
||||
this.out = outBam
|
||||
this.known :+= qscript.dbSNP
|
||||
this.known ++= qscript.dbSNP
|
||||
if (qscript.indels != null)
|
||||
this.known :+= qscript.indels
|
||||
this.known ++= qscript.indels
|
||||
this.consensusDeterminationModel = cleanModelEnum
|
||||
this.compress = 0
|
||||
this.scatterCount = nContigs
|
||||
|
|
@ -344,7 +321,7 @@ class DataProcessingPipeline extends QScript {
|
|||
}
|
||||
|
||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.knownSites :+= qscript.dbSNP
|
||||
this.knownSites ++= qscript.dbSNP
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="23" status="integration" />
|
||||
<info organisation="org.broad" module="tribble" revision="24" status="integration" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue