Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-09-07 19:18:24 -04:00
commit 7ba6c29d32
49 changed files with 377 additions and 285 deletions

View File

@ -12,7 +12,9 @@ if ( onCMDLine ) {
inputFileName = args[1] inputFileName = args[1]
outputPDF = args[2] outputPDF = args[2]
} else { } else {
inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
outputPDF = NA outputPDF = NA
} }
@ -113,11 +115,22 @@ plotGroup <- function(groupTable) {
textplot(as.data.frame(sum), show.rownames=F) textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "itemizing each iteration"), cex=3) title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
# todo -- how do we group by annotations?
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
p <- p + opts(title=paste("Job runtime histogram for", name))
print(p)
}
# as above, but averaging over all iterations # as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) if ( dim(sub)[1] > 1 ) {
textplot(as.data.frame(sum), show.rownames=F) sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
title(paste("Job summary for", name, "averaging over all iterations"), cex=3) textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}
} }
# print out some useful basic information # print out some useful basic information
@ -146,7 +159,7 @@ plotJobsGantt(gatkReportData, T)
plotJobsGantt(gatkReportData, F) plotJobsGantt(gatkReportData, F)
plotProgressByTime(gatkReportData) plotProgressByTime(gatkReportData)
for ( group in gatkReportData ) { for ( group in gatkReportData ) {
plotGroup(group) plotGroup(group)
} }
if ( ! is.na(outputPDF) ) { if ( ! is.na(outputPDF) ) {

View File

@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
if (!( walker instanceof TreeReducible )) if (!( walker instanceof TreeReducible ))
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers"); throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
traversalEngine.startTimers();
ReduceTree reduceTree = new ReduceTree(this); ReduceTree reduceTree = new ReduceTree(this);
initializeWalker(walker); initializeWalker(walker);

View File

@ -44,7 +44,6 @@ public class LinearMicroScheduler extends MicroScheduler {
* @param shardStrategy A strategy for sharding the data. * @param shardStrategy A strategy for sharding the data.
*/ */
public Object execute(Walker walker, ShardStrategy shardStrategy) { public Object execute(Walker walker, ShardStrategy shardStrategy) {
traversalEngine.startTimers();
walker.initialize(); walker.initialize();
Accumulator accumulator = Accumulator.create(engine,walker); Accumulator accumulator = Accumulator.create(engine,walker);
@ -54,6 +53,7 @@ public class LinearMicroScheduler extends MicroScheduler {
if ( done || shard == null ) // we ran out of shards that aren't owned if ( done || shard == null ) // we ran out of shards that aren't owned
break; break;
traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) { if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker; LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata()); WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());

View File

@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
public Object call() { public Object call() {
try { try {
traversalEngine.startTimersIfNecessary();
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
Object accumulator = walker.reduceInit(); Object accumulator = walker.reduceInit();

View File

@ -3,7 +3,7 @@ package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.SequenceUtil; import net.sf.samtools.util.SequenceUtil;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.annotation.Strand; import org.broad.tribble.annotation.Strand;
import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.dbsnp.OldDbSNPFeature;
import org.broad.tribble.gelitext.GeliTextFeature; import org.broad.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
@ -93,27 +93,27 @@ public class VariantContextAdaptors {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
private static class DBSnpAdaptor implements VCAdaptor { private static class DBSnpAdaptor implements VCAdaptor {
private static boolean isSNP(DbSNPFeature feature) { private static boolean isSNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact"); return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
} }
private static boolean isMNP(DbSNPFeature feature) { private static boolean isMNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range"); return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
} }
private static boolean isInsertion(DbSNPFeature feature) { private static boolean isInsertion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("insertion"); return feature.getVariantType().contains("insertion");
} }
private static boolean isDeletion(DbSNPFeature feature) { private static boolean isDeletion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("deletion"); return feature.getVariantType().contains("deletion");
} }
private static boolean isIndel(DbSNPFeature feature) { private static boolean isIndel(OldDbSNPFeature feature) {
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature); return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
} }
public static boolean isComplexIndel(DbSNPFeature feature) { public static boolean isComplexIndel(OldDbSNPFeature feature) {
return feature.getVariantType().contains("in-del"); return feature.getVariantType().contains("in-del");
} }
@ -125,7 +125,7 @@ public class VariantContextAdaptors {
* *
* @return an alternate allele list * @return an alternate allele list
*/ */
public static List<String> getAlternateAlleleList(DbSNPFeature feature) { public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
List<String> ret = new ArrayList<String>(); List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList(feature)) for (String allele : getAlleleList(feature))
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele); if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
@ -139,7 +139,7 @@ public class VariantContextAdaptors {
* *
* @return an alternate allele list * @return an alternate allele list
*/ */
public static List<String> getAlleleList(DbSNPFeature feature) { public static List<String> getAlleleList(OldDbSNPFeature feature) {
List<String> alleleList = new ArrayList<String>(); List<String> alleleList = new ArrayList<String>();
// add ref first // add ref first
if ( feature.getStrand() == Strand.POSITIVE ) if ( feature.getStrand() == Strand.POSITIVE )
@ -156,14 +156,14 @@ public class VariantContextAdaptors {
/** /**
* Converts non-VCF formatted dbSNP records to VariantContext. * Converts non-VCF formatted dbSNP records to VariantContext.
* @return DbSNPFeature. * @return OldDbSNPFeature.
*/ */
@Override @Override
public Class<? extends Feature> getAdaptableFeatureType() { return DbSNPFeature.class; } public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
@Override @Override
public VariantContext convert(String name, Object input, ReferenceContext ref) { public VariantContext convert(String name, Object input, ReferenceContext ref) {
DbSNPFeature dbsnp = (DbSNPFeature)input; OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) ) if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null; return null;
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true); Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);

View File

@ -115,12 +115,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>(); LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
/** We use the SimpleTimer to time our run */ /** We use the SimpleTimer to time our run */
private SimpleTimer timer = new SimpleTimer("Traversal"); private SimpleTimer timer = null;
// How long can we go without printing some progress info? // How long can we go without printing some progress info?
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000; private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
private int printProgressCheckCounter = 0; private int printProgressCheckCounter = 0;
private long lastProgressPrintTime = -1; // When was the last time we printed progress log? private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0; private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0; private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
@ -209,11 +210,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
} }
} }
/** /**
* Should be called to indicate that we're going to process records and the timer should start ticking * Should be called to indicate that we're going to process records and the timer should start ticking. This
* function should be called right before any traversal work is done, to avoid counting setup costs in the
* processing costs and inflating the estimated runtime.
*/ */
public void startTimers() { public void startTimersIfNecessary() {
timer.start(); if ( timer == null ) {
lastProgressPrintTime = timer.currentTime(); timer = new SimpleTimer("Traversal");
timer.start();
lastProgressPrintTime = timer.currentTime();
}
} }
/** /**
@ -224,7 +230,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
* @return true if the maximum interval (in millisecs) has passed since the last printing * @return true if the maximum interval (in millisecs) has passed since the last printing
*/ */
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) { private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
return (curTime - lastPrintTime) > printFreq; long elapsed = curTime - lastPrintTime;
return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
} }
/** /**

View File

@ -79,24 +79,45 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
@ArgumentCollection @ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
* If this argument is present, the original allele frequencies and counts from this vcf are added as annotations ACH,AFH and ANH. at each record present in this vcf
*/
@Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false) @Input(fullName="comp", shortName = "comp", doc="Comparison VCF file", required=false)
public RodBinding<VariantContext> comp; public RodBinding<VariantContext> comp;
/**
* This required argument is used to annotate each site in the vcf INFO field with R2 annotation. Will be NaN if Beagle determined there are no variant samples.
*/
@Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true) @Input(fullName="beagleR2", shortName = "beagleR2", doc="Beagle-produced .r2 file containing R^2 values for all markers", required=true)
public RodBinding<BeagleFeature> beagleR2; public RodBinding<BeagleFeature> beagleR2;
/**
* These values will populate the GL field for each sample and contain the posterior probability of each genotype given the data after phasing and imputation.
*/
@Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true) @Input(fullName="beagleProbs", shortName = "beagleProbs", doc="Beagle-produced .probs file containing posterior genotype probabilities", required=true)
public RodBinding<BeagleFeature> beagleProbs; public RodBinding<BeagleFeature> beagleProbs;
/**
* By default, all genotypes will be marked in the VCF as "phased", using the "|" separator after Beagle.
*/
@Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true) @Input(fullName="beaglePhased", shortName = "beaglePhased", doc="Beagle-produced .phased file containing phased genotypes", required=true)
public RodBinding<BeagleFeature> beaglePhased; public RodBinding<BeagleFeature> beaglePhased;
@Output(doc="VCF File to which variants should be written",required=true) @Output(doc="VCF File to which variants should be written",required=true)
protected VCFWriter vcfWriter = null; protected VCFWriter vcfWriter = null;
/**
* If this argument is absent, and if Beagle determines that there is no sample in a site that has a variant genotype, the site will be marked as filtered (Default behavior).
* If the argument is present, the site won't be marked as filtered under this condition even if there are no variant genotypes.
*/
@Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false) @Argument(fullName="dont_mark_monomorphic_sites_as_filtered", shortName="keep_monomorphic", doc="If provided, we won't filter sites that beagle tags as monomorphic. Useful for imputing a sample's genotypes from a reference panel" ,required=false)
public boolean DONT_FILTER_MONOMORPHIC_SITES = false; public boolean DONT_FILTER_MONOMORPHIC_SITES = false;
/**
* Value between 0 and 1. If the probability of getting a genotype correctly (based on the posterior genotype probabilities and the actual genotype) is below this threshold,
* a genotype will be substitute by a no-call.
*/
@Argument(fullName="no" + @Argument(fullName="no" +
"call_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false) "call_threshold", shortName="ncthr", doc="Threshold of confidence at which a genotype won't be called", required=false)
private double noCallThreshold = 0.0; private double noCallThreshold = 0.0;

View File

@ -112,6 +112,9 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
@Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false) @Argument(fullName = "bootstrap_vcf",shortName = "bvcf", doc = "Output a VCF with the records used for bootstrapping filtered out", required = false)
VCFWriter bootstrapVCFOutput = null; VCFWriter bootstrapVCFOutput = null;
/**
* If sample gender is known, this flag should be set to true to ensure that Beagle treats male Chr X properly.
*/
@Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false) @Argument(fullName = "checkIsMaleOnChrX", shortName = "checkIsMaleOnChrX", doc = "Set to true when Beagle-ing chrX and want to ensure male samples don't have heterozygous calls.", required = false)
public boolean CHECK_IS_MALE_ON_CHR_X = false; public boolean CHECK_IS_MALE_ON_CHR_X = false;

View File

@ -105,10 +105,19 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"}) @Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})
Map<DoCOutputType,PrintStream> out; Map<DoCOutputType,PrintStream> out;
/**
* Sets the low-coverage cutoff for granular binning. All loci with depth < START are counted in the first bin.
*/
@Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false) @Argument(fullName = "start", doc = "Starting (left endpoint) for granular binning", required = false)
int start = 1; int start = 1;
/**
* Sets the high-coverage cutoff for granular binning. All loci with depth > END are counted in the last bin.
*/
@Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false) @Argument(fullName = "stop", doc = "Ending (right endpoint) for granular binning", required = false)
int stop = 500; int stop = 500;
/**
* Sets the number of bins for granular binning
*/
@Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false) @Argument(fullName = "nBins", doc = "Number of bins to use for granular binning", required = false)
int nBins = 499; int nBins = 499;
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false) @Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth. Defaults to -1.", required = false)
@ -119,28 +128,59 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
byte minBaseQuality = -1; byte minBaseQuality = -1;
@Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false) @Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false)
byte maxBaseQuality = Byte.MAX_VALUE; byte maxBaseQuality = Byte.MAX_VALUE;
/**
* Instead of reporting depth, report the base pileup at each locus
*/
@Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false) @Argument(fullName = "printBaseCounts", shortName = "baseCounts", doc = "Will add base counts to per-locus output.", required = false)
boolean printBaseCounts = false; boolean printBaseCounts = false;
/**
* Do not tabulate locus statistics (# loci covered by sample by coverage)
*/
@Argument(fullName = "omitLocusTable", shortName = "omitLocusTable", doc = "Will not calculate the per-sample per-depth counts of loci, which should result in speedup", required = false) @Argument(fullName = "omitLocusTable", shortName = "omitLocusTable", doc = "Will not calculate the per-sample per-depth counts of loci, which should result in speedup", required = false)
boolean omitLocusTable = false; boolean omitLocusTable = false;
/**
* Do not tabulate interval statistics (mean, median, quartiles AND # intervals by sample by coverage)
*/
@Argument(fullName = "omitIntervalStatistics", shortName = "omitIntervals", doc = "Will omit the per-interval statistics section, which should result in speedup", required = false) @Argument(fullName = "omitIntervalStatistics", shortName = "omitIntervals", doc = "Will omit the per-interval statistics section, which should result in speedup", required = false)
boolean omitIntervals = false; boolean omitIntervals = false;
/**
* Do not print the total coverage at every base
*/
@Argument(fullName = "omitDepthOutputAtEachBase", shortName = "omitBaseOutput", doc = "Will omit the output of the depth of coverage at each base, which should result in speedup", required = false) @Argument(fullName = "omitDepthOutputAtEachBase", shortName = "omitBaseOutput", doc = "Will omit the output of the depth of coverage at each base, which should result in speedup", required = false)
boolean omitDepthOutput = false; boolean omitDepthOutput = false;
@Argument(fullName = "printBinEndpointsAndExit", doc = "Prints the bin values and exits immediately. Use to calibrate what bins you want before running on data.", required = false) @Argument(fullName = "printBinEndpointsAndExit", doc = "Prints the bin values and exits immediately. Use to calibrate what bins you want before running on data.", required = false)
boolean printBinEndpointsAndExit = false; boolean printBinEndpointsAndExit = false;
/**
* Do not tabulate the sample summary statistics (total, mean, median, quartile coverage per sample)
*/
@Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false) @Argument(fullName = "omitPerSampleStats", shortName = "omitSampleSummary", doc = "Omits the summary files per-sample. These statistics are still calculated, so this argument will not improve runtime.", required = false)
boolean omitSampleSummary = false; boolean omitSampleSummary = false;
/**
* A way of partitioning reads into groups. Can be sample, readgroup, or library.
*/
@Argument(fullName = "partitionType", shortName = "pt", doc = "Partition type for depth of coverage. Defaults to sample. Can be any combination of sample, readgroup, library.", required = false) @Argument(fullName = "partitionType", shortName = "pt", doc = "Partition type for depth of coverage. Defaults to sample. Can be any combination of sample, readgroup, library.", required = false)
Set<DoCOutputType.Partition> partitionTypes = EnumSet.of(DoCOutputType.Partition.sample); Set<DoCOutputType.Partition> partitionTypes = EnumSet.of(DoCOutputType.Partition.sample);
/**
* Consider a spanning deletion as contributing to coverage. Also enables deletion counts in per-base output.
*/
@Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false) @Argument(fullName = "includeDeletions", shortName = "dels", doc = "Include information on deletions", required = false)
boolean includeDeletions = false; boolean includeDeletions = false;
@Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false) @Argument(fullName = "ignoreDeletionSites", doc = "Ignore sites consisting only of deletions", required = false)
boolean ignoreDeletionSites = false; boolean ignoreDeletionSites = false;
/**
* Path to the RefSeq file for use in aggregating coverage statistics over genes
*/
@Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false) @Argument(fullName = "calculateCoverageOverGenes", shortName = "geneList", doc = "Calculate the coverage statistics over this list of genes. Currently accepts RefSeq.", required = false)
File refSeqGeneList = null; File refSeqGeneList = null;
/**
* The format of the output file
*/
@Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false) @Argument(fullName = "outputFormat", doc = "the format of the output file (e.g. csv, table, rtable); defaults to r-readable table", required = false)
String outputFormat = "rtable"; String outputFormat = "rtable";
/**
* A coverage threshold for summarizing (e.g. % bases >= CT for each sample)
*/
@Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false) @Argument(fullName = "summaryCoverageThreshold", shortName = "ct", doc = "for summary file outputs, report the % of bases coverd to >= this number. Defaults to 15; can take multiple arguments.", required = false)
int[] coverageThresholds = {15}; int[] coverageThresholds = {15};

View File

@ -39,10 +39,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.*; import java.util.*;
@ -239,7 +236,8 @@ public class UnifiedGenotyperEngine {
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles); VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles);
if ( vcInput == null ) if ( vcInput == null )
return null; return null;
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles()); vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase());
} else { } else {
// deal with bad/non-standard reference bases // deal with bad/non-standard reference bases
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) ) if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )

View File

@ -77,6 +77,11 @@ import java.util.*;
* if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords. * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
*
* <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
* please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
*
*
*/ */
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class}) @ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> { public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {

View File

@ -93,20 +93,30 @@ import java.util.List;
*/ */
@Requires(value={DataSource.REFERENCE}) @Requires(value={DataSource.REFERENCE})
public class ValidationAmplicons extends RodWalker<Integer,Integer> { public class ValidationAmplicons extends RodWalker<Integer,Integer> {
/**
* A Table-formatted file listing amplicon contig, start, stop, and a name for the amplicon (or probe)
*/
@Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+ @Input(fullName = "ProbeIntervals", doc="A collection of intervals in table format with optional names that represent the "+
"intervals surrounding the probe sites amplicons should be designed for", required=true) "intervals surrounding the probe sites amplicons should be designed for", required=true)
RodBinding<TableFeature> probeIntervals; RodBinding<TableFeature> probeIntervals;
/**
* A VCF file containing the bi-allelic sites for validation. Filtered records will prompt a warning, and will be flagged as filtered in the output fastq.
*/
@Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true) @Input(fullName = "ValidateAlleles", doc="A VCF containing the sites and alleles you want to validate. Restricted to *BI-Allelic* sites", required=true)
RodBinding<VariantContext> validateAlleles; RodBinding<VariantContext> validateAlleles;
/**
* A VCF file containing variants to be masked. A mask variant overlapping a validation site will be ignored at the validation site.
*/
@Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true) @Input(fullName = "MaskAlleles", doc="A VCF containing the sites you want to MASK from the designed amplicon (e.g. by Ns or lower-cased bases)", required=true)
RodBinding<VariantContext> maskAlleles; RodBinding<VariantContext> maskAlleles;
@Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false) @Argument(doc="Lower case SNPs rather than replacing with 'N'",fullName="lowerCaseSNPs",required=false)
boolean lowerCaseSNPs = false; boolean lowerCaseSNPs = false;
/**
* BWA single-end alignment is used as a primer specificity proxy. Low-complexity regions (that don't align back to themselves as a best hit) are lowercased.
* This changes the size of the k-mer used for alignment.
*/
@Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false) @Argument(doc="Size of the virtual primer to use for lower-casing regions with low specificity",fullName="virtualPrimerSize",required=false)
int virtualPrimerSize = 20; int virtualPrimerSize = 20;

View File

@ -15,6 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window; import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.JexlExpression;
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
@ -24,6 +25,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -122,9 +124,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false) @Argument(fullName="doNotUseAllStandardStratifications", shortName="noST", doc="Do not use the standard stratification modules by default (instead, only those that are specified with the -S option)", required=false)
protected Boolean NO_STANDARD_STRATIFICATIONS = false; protected Boolean NO_STANDARD_STRATIFICATIONS = false;
@Argument(fullName="onlyVariantsOfType", shortName="VT", doc="If provided, only variants of these types will be considered during the evaluation, in ", required=false)
protected Set<VariantContext.Type> typesToUse = null;
/** /**
* See the -list argument to view available modules. * See the -list argument to view available modules.
*/ */
@ -150,9 +149,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
private String TRANCHE_FILENAME = null;
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
private File ancestralAlignmentsFile = null; private File ancestralAlignmentsFile = null;
@ -233,16 +229,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
jexlExpressions.add(sjexl); jexlExpressions.add(sjexl);
} }
// Add select expressions for anything in the tranches file
if ( TRANCHE_FILENAME != null ) {
// we are going to build a few select names automatically from the tranches file
for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
logger.info("Adding select for all variant above the pCut of : " + t);
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
}
}
// Initialize the set of stratifications and evaluations to use // Initialize the set of stratifications and evaluations to use
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
@ -317,9 +303,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// find the comp // find the comp
final VariantContext comp = findMatchingComp(eval, compSet); final VariantContext comp = findMatchingComp(eval, compSet);
HashMap<VariantStratifier, ArrayList<String>> stateMap = new HashMap<VariantStratifier, ArrayList<String>>(); HashMap<VariantStratifier, List<String>> stateMap = new HashMap<VariantStratifier, List<String>>();
for ( VariantStratifier vs : stratificationObjects ) { for ( VariantStratifier vs : stratificationObjects ) {
ArrayList<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName); List<String> states = vs.getRelevantStates(ref, tracker, comp, compRod.getName(), eval, evalRod.getName(), sampleName);
stateMap.put(vs, states); stateMap.put(vs, states);
} }

View File

@ -75,7 +75,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
} }
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isVariant(); boolean evalIsGood = eval != null && eval.isPolymorphic();
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType()); boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
if (compIsGood) nCompVariants++; // count the number of comp events if (compIsGood) nCompVariants++; // count the number of comp events

View File

@ -100,11 +100,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've // So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
// added in a proxy check for monomorphic status here. // added in a proxy check for monomorphic status here.
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call. // Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) { if ( vc1.isMonomorphic() ) {
nRefLoci++; nRefLoci++;
} else { } else {
switch (vc1.getType()) { switch (vc1.getType()) {
case NO_VARIATION: case NO_VARIATION:
// shouldn't get here
break; break;
case SNP: case SNP:
nVariantLoci++; nVariantLoci++;

View File

@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
public int getComparisonOrder() { return 1; } // need only the evals public int getComparisonOrder() { return 1; } // need only the evals
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
if ( vc1.isIndel() ) { if ( vc1.isIndel() && vc1.isPolymorphic() ) {
if ( ! vc1.isBiallelic() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output
}
// only count simple insertions/deletions, not complex indels
if ( vc1.isSimpleInsertion() ) { if ( vc1.isSimpleInsertion() ) {
indelHistogram.update(vc1.getAlternateAllele(0).length()); indelHistogram.update(vc1.getAlternateAllele(0).length());
} else if ( vc1.isSimpleDeletion() ) { } else if ( vc1.isSimpleDeletion() ) {
indelHistogram.update(-vc1.getReference().length()); indelHistogram.update(-vc1.getReference().length());
} else {
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
} }
} }

View File

@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null ) { if (eval != null && eval.isPolymorphic()) {
if ( indelStats == null ) { if ( indelStats == null ) {
indelStats = new IndelStats(eval); indelStats = new IndelStats(eval);
} }

View File

@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
} }
} }
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) { if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
metrics.incrValue(eval); metrics.incrValue(eval);
} }
} }

View File

@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
} }
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) { if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
return null; //no interesting sites return null; //no interesting sites
} }
if (vc.hasGenotypes()) { //this maps allele to a count
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
//this maps allele to a count int numHetsHere = 0;
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>(); float numGenosHere = 0;
int numIndsHere = 0;
int numHetsHere = 0; for (Genotype genotype : vc.getGenotypes().values()) {
float numGenosHere = 0; numIndsHere++;
int numIndsHere = 0; if (!genotype.isNoCall()) {
//increment stats for heterozygosity
if (genotype.isHet()) {
numHetsHere++;
}
for (Genotype genotype : vc.getGenotypes().values()) { numGenosHere++;
numIndsHere++; //increment stats for pairwise mismatches
if (!genotype.isNoCall()) {
//increment stats for heterozygosity
if (genotype.isHet()) {
numHetsHere++;
}
numGenosHere++; for (Allele allele : genotype.getAlleles()) {
//increment stats for pairwise mismatches if (allele.isNonNull() && allele.isCalled()) {
String alleleString = allele.toString();
for (Allele allele : genotype.getAlleles()) { alleleCounts.putIfAbsent(alleleString, 0);
if (allele.isNonNull() && allele.isCalled()) { alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
String alleleString = allele.toString();
alleleCounts.putIfAbsent(alleleString, 0);
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
}
} }
} }
} }
if (numGenosHere > 0) { }
//only if have one called genotype at least if (numGenosHere > 0) {
this.numSites++; //only if have one called genotype at least
this.numSites++;
this.totalHet += numHetsHere / numGenosHere; this.totalHet += numHetsHere / numGenosHere;
//compute based on num sites //compute based on num sites
float harmonicFactor = 0; float harmonicFactor = 0;
for (int i = 1; i <= numIndsHere; i++) { for (int i = 1; i <= numIndsHere; i++) {
harmonicFactor += 1.0 / i; harmonicFactor += 1.0 / i;
} }
this.thetaRegionNumSites += 1.0 / harmonicFactor; this.thetaRegionNumSites += 1.0 / harmonicFactor;
//now compute pairwise mismatches //now compute pairwise mismatches
float numPairwise = 0; float numPairwise = 0;
float numDiffs = 0; float numDiffs = 0;
for (String allele1 : alleleCounts.keySet()) { for (String allele1 : alleleCounts.keySet()) {
int allele1Count = alleleCounts.get(allele1); int allele1Count = alleleCounts.get(allele1);
for (String allele2 : alleleCounts.keySet()) { for (String allele2 : alleleCounts.keySet()) {
if (allele1.compareTo(allele2) < 0) { if (allele1.compareTo(allele2) < 0) {
continue; continue;
} }
if (allele1 .compareTo(allele2) == 0) { if (allele1 .compareTo(allele2) == 0) {
numPairwise += allele1Count * (allele1Count - 1) * .5; numPairwise += allele1Count * (allele1Count - 1) * .5;
} }
else { else {
int allele2Count = alleleCounts.get(allele2); int allele2Count = alleleCounts.get(allele2);
numPairwise += allele1Count * allele2Count; numPairwise += allele1Count * allele2Count;
numDiffs += allele1Count * allele2Count; numDiffs += allele1Count * allele2Count;
}
} }
} }
}
if (numPairwise > 0) { if (numPairwise > 0) {
this.totalAvgDiffs += numDiffs / numPairwise; this.totalAvgDiffs += numDiffs / numPairwise;
}
} }
} }

View File

@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
} }
public void updateTiTv(VariantContext vc, boolean updateStandard) { public void updateTiTv(VariantContext vc, boolean updateStandard) {
if (vc != null && vc.isSNP() && vc.isBiallelic()) { if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
if (VariantContextUtils.isTransition(vc)) { if (VariantContextUtils.isTransition(vc)) {
if (updateStandard) nTiInComp++; if (updateStandard) nTiInComp++;
else nTi++; else nTi++;

View File

@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
public SiteStatus calcSiteStatus(VariantContext vc) { public SiteStatus calcSiteStatus(VariantContext vc) {
if ( vc == null ) return SiteStatus.NO_CALL; if ( vc == null ) return SiteStatus.NO_CALL;
if ( vc.isFiltered() ) return SiteStatus.FILTERED; if ( vc.isFiltered() ) return SiteStatus.FILTERED;
if ( ! vc.isVariant() ) return SiteStatus.MONO; if ( vc.isMonomorphic() ) return SiteStatus.MONO;
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) { if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
int ac = 0; int ac = 0;
@ -132,8 +133,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
else else
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY); ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO; return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
} else if ( vc.hasGenotypes() ) {
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
} else { } else {
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
//return SiteStatus.NO_CALL; // we can't figure out what to do //return SiteStatus.NO_CALL; // we can't figure out what to do

View File

@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null; final String interesting = null;
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
if( titvStats == null ) { titvStats = new TiTvStats(); } if( titvStats == null ) { titvStats = new TiTvStats(); }
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));

View File

@ -10,10 +10,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
/**
* Stratifies the eval RODs by the allele count of the alternate allele
*
* Looks at the AC value in the INFO field, and uses that value if present. If absent,
* computes the AC from the genotypes themselves. For no AC can be computed, 0 is used.
*/
public class AlleleCount extends VariantStratifier { public class AlleleCount extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states = new ArrayList<String>();
@Override @Override
public void initialize() { public void initialize() {
List<RodBinding<VariantContext>> evals = getVariantEvalWalker().getEvals(); List<RodBinding<VariantContext>> evals = getVariantEvalWalker().getEvals();
@ -35,11 +38,7 @@ public class AlleleCount extends VariantStratifier {
getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes"); getVariantEvalWalker().getLogger().info("AlleleCount using " + nchrom + " chromosomes");
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(1); ArrayList<String> relevantStates = new ArrayList<String>(1);
if (eval != null) { if (eval != null) {

View File

@ -6,11 +6,15 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/**
* Stratifies the eval RODs by the allele frequency of the alternate allele
*
* Uses a constant 0.005 frequency grid, and projects the AF INFO field value. Requires
* that AF be present in every ROD, otherwise this stratification throws an exception
*/
public class AlleleFrequency extends VariantStratifier { public class AlleleFrequency extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>(); states = new ArrayList<String>();
@ -19,11 +23,7 @@ public class AlleleFrequency extends VariantStratifier {
} }
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
if (eval != null) { if (eval != null) {

View File

@ -6,22 +6,21 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/**
* Required stratification grouping output by each comp ROD
*/
public class CompRod extends VariantStratifier implements RequiredStratification { public class CompRod extends VariantStratifier implements RequiredStratification {
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>();
for ( RodBinding<VariantContext> rod : getVariantEvalWalker().getComps() ) for ( RodBinding<VariantContext> rod : getVariantEvalWalker().getComps() )
states.add(rod.getName()); states.add(rod.getName());
} }
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add(compName); relevantStates.add(compName);

View File

@ -5,23 +5,19 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/**
* Stratifies the evaluation by each contig in the reference sequence
*/
public class Contig extends VariantStratifier { public class Contig extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>();
states.addAll(getVariantEvalWalker().getContigNames()); states.addAll(getVariantEvalWalker().getContigNames());
states.add("all"); states.add("all");
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
if (eval != null) { if (eval != null) {

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/** /**
* CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site * CpG is a stratification module for VariantEval that divides the input data by within/not within a CpG site
@ -19,21 +20,14 @@ import java.util.ArrayList;
* A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G. * A CpG site is defined as a site where the reference base at a locus is a C and the adjacent reference base in the 3' direction is a G.
*/ */
public class CpG extends VariantStratifier { public class CpG extends VariantStratifier {
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>();
states.add("all"); states.add("all");
states.add("CpG"); states.add("CpG");
states.add("non_CpG"); states.add("non_CpG");
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
boolean isCpG = false; boolean isCpG = false;
if (ref != null && ref.getBases() != null) { if (ref != null && ref.getBases() != null) {
String fwRefBases = new String(ref.getBases()); String fwRefBases = new String(ref.getBases());

View File

@ -7,10 +7,12 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
/**
* Experimental stratification by the degeneracy of an amino acid, according to VCF annotation. Not safe
*/
public class Degeneracy extends VariantStratifier { public class Degeneracy extends VariantStratifier {
private ArrayList<String> states;
private HashMap<String, HashMap<Integer, String>> degeneracies; private HashMap<String, HashMap<Integer, String>> degeneracies;
@Override @Override
@ -77,11 +79,7 @@ public class Degeneracy extends VariantStratifier {
} }
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all"); relevantStates.add("all");

View File

@ -6,10 +6,12 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/**
* Required stratification grouping output by each eval ROD
*/
public class EvalRod extends VariantStratifier implements RequiredStratification { public class EvalRod extends VariantStratifier implements RequiredStratification {
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>(); states = new ArrayList<String>();
@ -17,11 +19,7 @@ public class EvalRod extends VariantStratifier implements RequiredStratification
states.add(rod.getName()); states.add(rod.getName());
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add(evalName); relevantStates.add(evalName);

View File

@ -5,24 +5,20 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/**
* Stratifies by the FILTER status (PASS, FAIL) of the eval records
*/
public class Filter extends VariantStratifier { public class Filter extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>();
states.add("called"); states.add("called");
states.add("filtered"); states.add("filtered");
states.add("raw"); states.add("raw");
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("raw"); relevantStates.add("raw");

View File

@ -5,25 +5,22 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
/**
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
*/
public class FunctionalClass extends VariantStratifier { public class FunctionalClass extends VariantStratifier {
// needs to know the variant context
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>();
states.add("all"); states.add("all");
states.add("silent"); states.add("silent");
states.add("missense"); states.add("missense");
states.add("nonsense"); states.add("nonsense");
} }
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all"); relevantStates.add("all");

View File

@ -6,30 +6,30 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.SortableJexlVCMatc
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Set; import java.util.Set;
/**
* Stratifies the eval RODs by user-supplied JEXL expressions
*
* See http://www.broadinstitute.org/gsa/wiki/index.php/Using_JEXL_expressions for more details
*/
public class JexlExpression extends VariantStratifier implements StandardStratification { public class JexlExpression extends VariantStratifier implements StandardStratification {
// needs to know the jexl expressions // needs to know the jexl expressions
private Set<SortableJexlVCMatchExp> jexlExpressions; private Set<SortableJexlVCMatchExp> jexlExpressions;
private ArrayList<String> states;
@Override @Override
public void initialize() { public void initialize() {
jexlExpressions = getVariantEvalWalker().getJexlExpressions(); jexlExpressions = getVariantEvalWalker().getJexlExpressions();
states = new ArrayList<String>();
states.add("none"); states.add("none");
for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) { for ( SortableJexlVCMatchExp jexlExpression : jexlExpressions ) {
states.add(jexlExpression.name); states.add(jexlExpression.name);
} }
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>(); ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("none"); relevantStates.add("none");

View File

@ -7,32 +7,31 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
/**
* Stratifies by whether a site in in the list of known RODs (e.g., dbsnp by default)
*/
public class Novelty extends VariantStratifier implements StandardStratification { public class Novelty extends VariantStratifier implements StandardStratification {
// needs the variant contexts and known names // needs the variant contexts and known names
private List<RodBinding<VariantContext>> knowns; private List<RodBinding<VariantContext>> knowns;
final private ArrayList<String> states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
@Override @Override
public void initialize() { public void initialize() {
states = new ArrayList<String>(Arrays.asList("all", "known", "novel"));
knowns = getVariantEvalWalker().getKnowns(); knowns = getVariantEvalWalker().getKnowns();
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
if (tracker != null && eval != null) { if (tracker != null && eval != null) {
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus()); final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
for ( final VariantContext c : knownComps ) { for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval // loop over sites, looking for something that matches the type eval
if ( eval.getType() == c.getType() ) { if ( eval.getType() == c.getType() ) {
return new ArrayList<String>(Arrays.asList("all", "known")); return Arrays.asList("all", "known");
} }
} }
} }
return new ArrayList<String>(Arrays.asList("all", "novel")); return Arrays.asList("all", "novel");
} }
} }

View File

@ -4,26 +4,23 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.Arrays;
import java.util.List;
/**
* Stratifies the eval RODs by each sample in the eval ROD.
*
* This allows the system to analyze each sample separately. Since many evaluations
* only consider non-reference sites, stratifying by sample results in meaningful
* calculations for CompOverlap
*/
public class Sample extends VariantStratifier { public class Sample extends VariantStratifier {
// needs the sample names
private ArrayList<String> samples;
@Override @Override
public void initialize() { public void initialize() {
samples = new ArrayList<String>(); states.addAll(getVariantEvalWalker().getSampleNamesForStratification());
samples.addAll(getVariantEvalWalker().getSampleNamesForStratification());
} }
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return samples; return Arrays.asList(sampleName);
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add(sampleName);
return relevantStates;
} }
} }

View File

@ -6,9 +6,12 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public abstract class VariantStratifier implements Comparable { public abstract class VariantStratifier implements Comparable {
private VariantEvalWalker variantEvalWalker; private VariantEvalWalker variantEvalWalker;
protected ArrayList<String> states = new ArrayList<String>();
/** /**
* @return a reference to the parent VariantEvalWalker running this stratification * @return a reference to the parent VariantEvalWalker running this stratification
@ -27,15 +30,15 @@ public abstract class VariantStratifier implements Comparable {
public abstract void initialize(); public abstract void initialize();
public ArrayList<String> getAllStates() { public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return new ArrayList<String>();
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return null; return null;
} }
public int compareTo(Object o1) { public int compareTo(Object o1) {
return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName()); return this.getClass().getSimpleName().compareTo(o1.getClass().getSimpleName());
} }
public ArrayList<String> getAllStates() {
return states;
}
} }

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* Stratifies the eval variants by their type (SNP, INDEL, ETC)
*/
public class VariantType extends VariantStratifier {
@Override
public void initialize() {
for ( VariantContext.Type t : VariantContext.Type.values() ) {
states.add(t.toString());
}
}
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
return eval == null ? Collections.<String>emptyList() : Arrays.asList(eval.getType().toString());
}
}

View File

@ -266,10 +266,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested sample * @return a new VariantContext with just the requested sample
*/ */
public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
ArrayList<String> sampleNames = new ArrayList<String>(); return getSubsetOfVariantContext(vc, Arrays.asList(sampleName));
sampleNames.add(sampleName);
return getSubsetOfVariantContext(vc, sampleNames);
} }
/** /**
@ -371,12 +368,12 @@ public class VariantEvalUtils {
* @param stateKeys all the state keys * @param stateKeys all the state keys
* @return a list of state keys * @return a list of state keys
*/ */
public ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, ArrayList<String>> stateMap, Stack<HashMap<VariantStratifier, ArrayList<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) { public ArrayList<StateKey> initializeStateKeys(HashMap<VariantStratifier, List<String>> stateMap, Stack<HashMap<VariantStratifier, List<String>>> stateStack, StateKey stateKey, ArrayList<StateKey> stateKeys) {
if (stateStack == null) { if (stateStack == null) {
stateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>(); stateStack = new Stack<HashMap<VariantStratifier, List<String>>>();
for (VariantStratifier vs : stateMap.keySet()) { for (VariantStratifier vs : stateMap.keySet()) {
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = new HashMap<VariantStratifier, ArrayList<String>>(); HashMap<VariantStratifier, List<String>> oneSetOfStates = new HashMap<VariantStratifier, List<String>>();
oneSetOfStates.put(vs, stateMap.get(vs)); oneSetOfStates.put(vs, stateMap.get(vs));
stateStack.add(oneSetOfStates); stateStack.add(oneSetOfStates);
@ -384,10 +381,10 @@ public class VariantEvalUtils {
} }
if (!stateStack.isEmpty()) { if (!stateStack.isEmpty()) {
Stack<HashMap<VariantStratifier, ArrayList<String>>> newStateStack = new Stack<HashMap<VariantStratifier, ArrayList<String>>>(); Stack<HashMap<VariantStratifier, List<String>>> newStateStack = new Stack<HashMap<VariantStratifier, List<String>>>();
newStateStack.addAll(stateStack); newStateStack.addAll(stateStack);
HashMap<VariantStratifier, ArrayList<String>> oneSetOfStates = newStateStack.pop(); HashMap<VariantStratifier, List<String>> oneSetOfStates = newStateStack.pop();
VariantStratifier vs = oneSetOfStates.keySet().iterator().next(); VariantStratifier vs = oneSetOfStates.keySet().iterator().next();
for (String state : oneSetOfStates.get(vs)) { for (String state : oneSetOfStates.get(vs)) {

View File

@ -214,7 +214,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false) @Argument(fullName="sample_expressions", shortName="se", doc="Regular expression to select many samples from the ROD tracks provided. Can be specified multiple times", required=false)
public Set<String> sampleExpressions ; public Set<String> sampleExpressions ;
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false) @Input(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line) to include. Can be specified multiple times", required=false)
public Set<File> sampleFiles; public Set<File> sampleFiles;
/** /**
@ -226,7 +226,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
/** /**
* Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded. * Note that sample exclusion takes precedence over inclusion, so that if a sample is in both lists it will be excluded.
*/ */
@Argument(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false) @Input(fullName="exclude_sample_file", shortName="xl_sf", doc="File containing a list of samples (one per line) to exclude. Can be specified multiple times", required=false)
public Set<File> XLsampleFiles = new HashSet<File>(0); public Set<File> XLsampleFiles = new HashSet<File>(0);
/** /**

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.variantutils; package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.TribbleException; import org.broad.tribble.TribbleException;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File; import java.io.File;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Set; import java.util.Set;
@ -168,14 +166,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
// get the RS IDs // get the RS IDs
Set<String> rsIDs = null; Set<String> rsIDs = null;
if ( tracker.hasValues(dbsnp.dbsnp) ) { if ( tracker.hasValues(dbsnp.dbsnp) ) {
List<VariantContext> dbsnpList = tracker.getValues(dbsnp.dbsnp, ref.getLocus());
rsIDs = new HashSet<String>(); rsIDs = new HashSet<String>();
for ( Object d : dbsnpList ) { for ( VariantContext rsID : tracker.getValues(dbsnp.dbsnp, ref.getLocus()) )
if (d instanceof DbSNPFeature ) rsIDs.add(rsID.getID());
rsIDs.add(((DbSNPFeature)d).getRsID());
else if (d instanceof VariantContext )
rsIDs.add(((VariantContext)d).getID());
}
} }
try { try {

View File

@ -63,6 +63,10 @@ public class ReadClipper {
if (start < 0 || stop > read.getReadLength() - 1) if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
// TODO add requires statement/check in the Hardclip function
if ( start > stop )
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
//System.out.println("Clipping start/stop: " + start + "/" + stop); //System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop)); this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);

View File

@ -82,7 +82,7 @@ public class PileupElement {
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
private Integer getReducedReadQualityTagValue() { private Integer getReducedReadQualityTagValue() {
return (Integer)getRead().getAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG); return getRead().getIntegerAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
} }
public boolean isReducedRead() { public boolean isReducedRead() {

View File

@ -41,6 +41,10 @@ public class GATKSAMRecord extends SAMRecord {
// because some values can be null, we don't want to duplicate effort // because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false; private boolean retrievedReadGroup = false;
/** A private cache for the reduced read quality. Null indicates the value hasn't be fetched yet or isn't available */
private boolean lookedUpReducedReadQuality = false;
private Integer reducedReadQuality;
// These temporary attributes were added here to make life easier for // These temporary attributes were added here to make life easier for
// certain algorithms by providing a way to label or attach arbitrary data to // certain algorithms by providing a way to label or attach arbitrary data to
// individual GATKSAMRecords. // individual GATKSAMRecords.
@ -338,7 +342,17 @@ public class GATKSAMRecord extends SAMRecord {
public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); } public Object getAttribute(final String tag) { return mRecord.getAttribute(tag); }
public Integer getIntegerAttribute(final String tag) { return mRecord.getIntegerAttribute(tag); } public Integer getIntegerAttribute(final String tag) {
if ( tag == ReadUtils.REDUCED_READ_QUALITY_TAG ) {
if ( ! lookedUpReducedReadQuality ) {
lookedUpReducedReadQuality = true;
reducedReadQuality = mRecord.getIntegerAttribute(tag);
}
return reducedReadQuality;
} else {
return mRecord.getIntegerAttribute(tag);
}
}
public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); } public Short getShortAttribute(final String tag) { return mRecord.getShortAttribute(tag); }

View File

@ -983,7 +983,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return true if it's monomorphic * @return true if it's monomorphic
*/ */
public boolean isMonomorphic() { public boolean isMonomorphic() {
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); return ! isVariant() || (hasGenotypes() && getHomRefCount() + getNoCallCount() == getNSamples());
} }
/** /**

View File

@ -127,6 +127,7 @@ public class TraverseReadsUnitTest extends BaseTest {
Object accumulator = countReadWalker.reduceInit(); Object accumulator = countReadWalker.reduceInit();
while (shardStrategy.hasNext()) { while (shardStrategy.hasNext()) {
traversalEngine.startTimersIfNecessary();
Shard shard = shardStrategy.next(); Shard shard = shardStrategy.next();
if (shard == null) { if (shard == null) {

View File

@ -261,10 +261,10 @@ public class VariantEvalIntegrationTest extends WalkerTest {
return String.format("%s -select '%s' -selectName %s", cmd, select, name); return String.format("%s -select '%s' -selectName %s", cmd, select, name);
} }
@Test @Test(enabled = false) // no longer supported in the GATK
public void testTranches() { public void testTranches() {
String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt"; String extraArgs = "-T VariantEval -R "+ hg18Reference +" --eval " + validationDataLocation + "GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.optimized.vcf -o %s -EV TiTvVariantEvaluator -L chr1 -noEV -ST CpG -tf " + testDir + "tranches.6.txt";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("984df6e94a546294fc7e0846cbac2dfe")); WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("6af2b9959aa1778a5b712536de453952"));
executeTestParallel("testTranches",spec); executeTestParallel("testTranches",spec);
} }

View File

@ -23,7 +23,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference + "-R " + b36KGReference +
" --variant:dbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" + " --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
" -T VariantsToVCF" + " -T VariantsToVCF" +
" -L 1:1-30,000,000" + " -L 1:1-30,000,000" +
" -o %s" + " -o %s" +

View File

@ -11,7 +11,7 @@ import net.sf.samtools.SAMFileReader
import net.sf.samtools.SAMFileHeader.SortOrder import net.sf.samtools.SAMFileHeader.SortOrder
import org.broadinstitute.sting.queue.util.QScriptUtils import org.broadinstitute.sting.queue.util.QScriptUtils
import org.broadinstitute.sting.queue.function.{CommandLineFunction, ListWriterFunction} import org.broadinstitute.sting.queue.function.ListWriterFunction
class DataProcessingPipeline extends QScript { class DataProcessingPipeline extends QScript {
qscript => qscript =>
@ -31,19 +31,14 @@ class DataProcessingPipeline extends QScript {
var reference: File = _ var reference: File = _
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true) @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
var dbSNP: File = _ var dbSNP: List[File] = List()
/**************************************************************************** /****************************************************************************
* Optional Parameters * Optional Parameters
****************************************************************************/ ****************************************************************************/
// @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
// var sortSamJar: File = _
//
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false) @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
var indels: File = _ var indels: List[File] = List()
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
var bwaPath: File = _ var bwaPath: File = _
@ -132,24 +127,6 @@ class DataProcessingPipeline extends QScript {
} }
} }
return sampleTable.toMap return sampleTable.toMap
// println("\n\n*** INPUT FILES ***\n")
// // Creating one file for each sample in the dataset
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
// for ((sample, flist) <- sampleTable) {
//
// println(sample + ":")
// for (f <- flist)
// println (f)
// println()
//
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
// sampleBamFiles(sample) = sampleFileName
// //add(writeList(flist, sampleFileName))
// }
// println("*** INPUT FILES ***\n\n")
//
// return sampleBamFiles.toMap
} }
// Rebuilds the Read Group string to give BWA // Rebuilds the Read Group string to give BWA
@ -321,9 +298,9 @@ class DataProcessingPipeline extends QScript {
this.input_file = inBams this.input_file = inBams
this.out = outIntervals this.out = outIntervals
this.mismatchFraction = 0.0 this.mismatchFraction = 0.0
this.known :+= qscript.dbSNP this.known ++= qscript.dbSNP
if (indels != null) if (indels != null)
this.known :+= qscript.indels this.known ++= qscript.indels
this.scatterCount = nContigs this.scatterCount = nContigs
this.analysisName = queueLogDir + outIntervals + ".target" this.analysisName = queueLogDir + outIntervals + ".target"
this.jobName = queueLogDir + outIntervals + ".target" this.jobName = queueLogDir + outIntervals + ".target"
@ -333,9 +310,9 @@ class DataProcessingPipeline extends QScript {
this.input_file = inBams this.input_file = inBams
this.targetIntervals = tIntervals this.targetIntervals = tIntervals
this.out = outBam this.out = outBam
this.known :+= qscript.dbSNP this.known ++= qscript.dbSNP
if (qscript.indels != null) if (qscript.indels != null)
this.known :+= qscript.indels this.known ++= qscript.indels
this.consensusDeterminationModel = cleanModelEnum this.consensusDeterminationModel = cleanModelEnum
this.compress = 0 this.compress = 0
this.scatterCount = nContigs this.scatterCount = nContigs
@ -344,7 +321,7 @@ class DataProcessingPipeline extends QScript {
} }
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
this.knownSites :+= qscript.dbSNP this.knownSites ++= qscript.dbSNP
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= inBam this.input_file :+= inBam
this.recal_file = outRecalFile this.recal_file = outRecalFile

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0"> <ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="23" status="integration" /> <info organisation="org.broad" module="tribble" revision="24" status="integration" />
</ivy-module> </ivy-module>