Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
e47a389b26
|
|
@ -57,6 +57,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||||
import org.broadinstitute.variant.vcf.*;
|
import org.broadinstitute.variant.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
@ -180,18 +182,47 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
|
||||||
headerLines = new LinkedHashSet<VCFHeaderLine>(getSelectedHeaders(headerLines));
|
headerLines = new LinkedHashSet<VCFHeaderLine>(getSelectedHeaders(headerLines));
|
||||||
|
|
||||||
// Optionally add in the intervals.
|
// Optionally add in the intervals.
|
||||||
if (includeIntervals && getToolkit().getArguments().intervals != null) {
|
if (includeIntervals) {
|
||||||
for (IntervalBinding<Feature> intervalBinding : getToolkit().getArguments().intervals) {
|
IntervalArgumentCollection intervalArguments = getToolkit().getArguments().intervalArguments;
|
||||||
String source = intervalBinding.getSource();
|
if (intervalArguments.intervals != null) {
|
||||||
if (source == null)
|
for (IntervalBinding<Feature> intervalBinding : intervalArguments.intervals) {
|
||||||
continue;
|
String source = intervalBinding.getSource();
|
||||||
File file = new File(source);
|
if (source == null)
|
||||||
if (file.exists()) {
|
continue;
|
||||||
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, FilenameUtils.getBaseName(file.getName())));
|
File file = new File(source);
|
||||||
} else {
|
if (file.exists()) {
|
||||||
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, source));
|
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, FilenameUtils.getBaseName(file.getName())));
|
||||||
|
} else {
|
||||||
|
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVALS_KEY, source));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (intervalArguments.excludeIntervals != null) {
|
||||||
|
for (IntervalBinding<Feature> intervalBinding : intervalArguments.excludeIntervals) {
|
||||||
|
String source = intervalBinding.getSource();
|
||||||
|
if (source == null)
|
||||||
|
continue;
|
||||||
|
File file = new File(source);
|
||||||
|
if (file.exists()) {
|
||||||
|
headerLines.add(new VCFHeaderLine(VCFHeader.EXCLUDE_INTERVALS_KEY, FilenameUtils.getBaseName(file.getName())));
|
||||||
|
} else {
|
||||||
|
headerLines.add(new VCFHeaderLine(VCFHeader.EXCLUDE_INTERVALS_KEY, source));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intervalArguments.intervalMerging != IntervalMergingRule.ALL) {
|
||||||
|
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVAL_MERGING_KEY, String.valueOf(intervalArguments.intervalMerging)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intervalArguments.intervalSetRule != IntervalSetRule.UNION) {
|
||||||
|
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVAL_SET_RULE_KEY, String.valueOf(intervalArguments.intervalSetRule)));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (intervalArguments.intervalPadding != 0) {
|
||||||
|
headerLines.add(new VCFHeaderLine(VCFHeader.INTERVAL_PADDING_KEY, String.valueOf(intervalArguments.intervalPadding)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
TreeSet<String> vcfSamples = new TreeSet<String>(SampleUtils.getSampleList(vcfRods, VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class IntervalArgumentCollection {
|
||||||
|
/**
|
||||||
|
* Using this option one can instruct the GATK engine to traverse over only part of the genome. This argument can be specified multiple times.
|
||||||
|
* One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
|
||||||
|
* Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
|
||||||
|
* To specify the completely unmapped reads in the BAM file (i.e. those without a reference contig) use -L unmapped.
|
||||||
|
*/
|
||||||
|
@Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
|
||||||
|
public List<IntervalBinding<Feature>> intervals = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome. This argument can be specified multiple times.
|
||||||
|
* One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals).
|
||||||
|
* Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf).
|
||||||
|
*/
|
||||||
|
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
|
||||||
|
public List<IntervalBinding<Feature>> excludeIntervals = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How should the intervals specified by multiple -L or -XL arguments be combined? Using this argument one can, for example, traverse over all of the positions
|
||||||
|
* for which there is a record in a VCF but just in chromosome 20 (-L chr20 -L file.vcf -isr INTERSECTION).
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs", required = false)
|
||||||
|
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should abutting (but not overlapping) intervals be treated as separate intervals?
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
|
||||||
|
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For example, '-L chr1:100' with a padding value of 20 would turn into '-L chr1:80-120'.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "interval_padding", shortName = "ip", doc = "Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument", required = false)
|
||||||
|
public int intervalPadding = 0;
|
||||||
|
}
|
||||||
|
|
@ -55,7 +55,6 @@ import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
|
|
@ -361,7 +360,6 @@ public class GenomeAnalysisEngine {
|
||||||
* Returns a list of active, initialized read transformers
|
* Returns a list of active, initialized read transformers
|
||||||
*
|
*
|
||||||
* @param walker the walker we need to apply read transformers too
|
* @param walker the walker we need to apply read transformers too
|
||||||
* @return a non-null list of read transformers
|
|
||||||
*/
|
*/
|
||||||
public void initializeReadTransformers(final Walker walker) {
|
public void initializeReadTransformers(final Walker walker) {
|
||||||
final List<ReadTransformer> activeTransformers = new ArrayList<ReadTransformer>();
|
final List<ReadTransformer> activeTransformers = new ArrayList<ReadTransformer>();
|
||||||
|
|
@ -672,41 +670,7 @@ public class GenomeAnalysisEngine {
|
||||||
* Setup the intervals to be processed
|
* Setup the intervals to be processed
|
||||||
*/
|
*/
|
||||||
protected void initializeIntervals() {
|
protected void initializeIntervals() {
|
||||||
// return if no interval arguments at all
|
intervals = IntervalUtils.parseIntervalArguments(this.referenceDataSource, argCollection.intervalArguments);
|
||||||
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
|
||||||
return;
|
|
||||||
|
|
||||||
// Note that the use of '-L all' is no longer supported.
|
|
||||||
|
|
||||||
// if include argument isn't given, create new set of all possible intervals
|
|
||||||
|
|
||||||
final Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = IntervalUtils.parseIntervalBindingsPair(
|
|
||||||
this.referenceDataSource,
|
|
||||||
argCollection.intervals,
|
|
||||||
argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding,
|
|
||||||
argCollection.excludeIntervals);
|
|
||||||
|
|
||||||
final GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
|
|
||||||
final GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
|
|
||||||
|
|
||||||
// if no exclude arguments, can return parseIntervalArguments directly
|
|
||||||
if ( excludeSortedSet == null )
|
|
||||||
intervals = includeSortedSet;
|
|
||||||
|
|
||||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
|
||||||
else {
|
|
||||||
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
|
||||||
|
|
||||||
// logging messages only printed when exclude (-XL) arguments are given
|
|
||||||
final long toPruneSize = includeSortedSet.coveredSize();
|
|
||||||
final long toExcludeSize = excludeSortedSet.coveredSize();
|
|
||||||
final long intervalSize = intervals.coveredSize();
|
|
||||||
logger.info(String.format("Initial include intervals span %d loci; exclude intervals span %d loci", toPruneSize, toExcludeSize));
|
|
||||||
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)",
|
|
||||||
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.info(String.format("Processing %d bp from intervals", intervals.coveredSize()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -26,11 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.arguments;
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import org.broad.tribble.Feature;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
|
||||||
import org.broadinstitute.sting.commandline.Hidden;
|
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
|
||||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||||
|
|
@ -38,8 +34,6 @@ import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -100,41 +94,8 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
||||||
public List<String> readFilters = new ArrayList<String>();
|
public List<String> readFilters = new ArrayList<String>();
|
||||||
|
|
||||||
/**
|
@ArgumentCollection
|
||||||
* Using this option one can instruct the GATK engine to traverse over only part of the genome. This argument can be specified multiple times.
|
public IntervalArgumentCollection intervalArguments = new IntervalArgumentCollection();
|
||||||
* One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
|
|
||||||
* Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
|
|
||||||
* To specify the completely unmapped reads in the BAM file (i.e. those without a reference contig) use -L unmapped.
|
|
||||||
*/
|
|
||||||
@Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
|
|
||||||
public List<IntervalBinding<Feature>> intervals = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome. This argument can be specified multiple times.
|
|
||||||
* One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals).
|
|
||||||
* Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf).
|
|
||||||
*/
|
|
||||||
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
|
|
||||||
public List<IntervalBinding<Feature>> excludeIntervals = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* How should the intervals specified by multiple -L or -XL arguments be combined? Using this argument one can, for example, traverse over all of the positions
|
|
||||||
* for which there is a record in a VCF but just in chromosome 20 (-L chr20 -L file.vcf -isr INTERSECTION).
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs", required = false)
|
|
||||||
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Should abutting (but not overlapping) intervals be treated as separate intervals?
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
|
|
||||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For example, '-L chr1:100' with a padding value of 20 would turn into '-L chr1:80-120'.
|
|
||||||
*/
|
|
||||||
@Argument(fullName = "interval_padding", shortName = "ip", doc = "Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument", required = false)
|
|
||||||
public int intervalPadding = 0;
|
|
||||||
|
|
||||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||||
public File referenceFile = null;
|
public File referenceFile = null;
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import net.sf.picard.util.IntervalList;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.commandline.IntervalArgumentCollection;
|
||||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
@ -534,6 +535,47 @@ public class IntervalUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static GenomeLocSortedSet parseIntervalArguments(final ReferenceDataSource referenceDataSource, IntervalArgumentCollection argCollection) {
|
||||||
|
GenomeLocSortedSet intervals = null;
|
||||||
|
|
||||||
|
// return if no interval arguments at all
|
||||||
|
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
||||||
|
return intervals;
|
||||||
|
|
||||||
|
// Note that the use of '-L all' is no longer supported.
|
||||||
|
|
||||||
|
// if include argument isn't given, create new set of all possible intervals
|
||||||
|
|
||||||
|
final Pair<GenomeLocSortedSet, GenomeLocSortedSet> includeExcludePair = IntervalUtils.parseIntervalBindingsPair(
|
||||||
|
referenceDataSource,
|
||||||
|
argCollection.intervals,
|
||||||
|
argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding,
|
||||||
|
argCollection.excludeIntervals);
|
||||||
|
|
||||||
|
final GenomeLocSortedSet includeSortedSet = includeExcludePair.getFirst();
|
||||||
|
final GenomeLocSortedSet excludeSortedSet = includeExcludePair.getSecond();
|
||||||
|
|
||||||
|
// if no exclude arguments, can return parseIntervalArguments directly
|
||||||
|
if ( excludeSortedSet == null )
|
||||||
|
intervals = includeSortedSet;
|
||||||
|
|
||||||
|
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||||
|
else {
|
||||||
|
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
||||||
|
|
||||||
|
// logging messages only printed when exclude (-XL) arguments are given
|
||||||
|
final long toPruneSize = includeSortedSet.coveredSize();
|
||||||
|
final long toExcludeSize = excludeSortedSet.coveredSize();
|
||||||
|
final long intervalSize = intervals.coveredSize();
|
||||||
|
logger.info(String.format("Initial include intervals span %d loci; exclude intervals span %d loci", toPruneSize, toExcludeSize));
|
||||||
|
logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)",
|
||||||
|
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(String.format("Processing %d bp from intervals", intervals.coveredSize()));
|
||||||
|
return intervals;
|
||||||
|
}
|
||||||
|
|
||||||
public static Pair<GenomeLocSortedSet, GenomeLocSortedSet> parseIntervalBindingsPair(
|
public static Pair<GenomeLocSortedSet, GenomeLocSortedSet> parseIntervalBindingsPair(
|
||||||
final ReferenceDataSource referenceDataSource,
|
final ReferenceDataSource referenceDataSource,
|
||||||
final List<IntervalBinding<Feature>> intervals,
|
final List<IntervalBinding<Feature>> intervals,
|
||||||
|
|
|
||||||
|
|
@ -73,6 +73,10 @@ public class VCFHeader {
|
||||||
public static final String REFERENCE_KEY = "reference";
|
public static final String REFERENCE_KEY = "reference";
|
||||||
public static final String CONTIG_KEY = "contig";
|
public static final String CONTIG_KEY = "contig";
|
||||||
public static final String INTERVALS_KEY = "intervals";
|
public static final String INTERVALS_KEY = "intervals";
|
||||||
|
public static final String EXCLUDE_INTERVALS_KEY = "excludeIntervals";
|
||||||
|
public static final String INTERVAL_MERGING_KEY = "interval_merging";
|
||||||
|
public static final String INTERVAL_SET_RULE_KEY = "interval_set_rule";
|
||||||
|
public static final String INTERVAL_PADDING_KEY = "interval_padding";
|
||||||
|
|
||||||
// were the input samples sorted originally (or are we sorting them)?
|
// were the input samples sorted originally (or are we sorting them)?
|
||||||
private boolean samplesWereAlreadySorted = true;
|
private boolean samplesWereAlreadySorted = true;
|
||||||
|
|
|
||||||
|
|
@ -1068,7 +1068,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
||||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||||
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
|
intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath()));
|
||||||
|
|
||||||
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
|
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalArguments.intervalSetRule, argCollection.intervalArguments.intervalMerging, argCollection.intervalArguments.intervalPadding, genomeLocParser);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
|
@Test(expectedExceptions=UserException.class, dataProvider="invalidIntervalTestData")
|
||||||
|
|
@ -1081,7 +1081,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
||||||
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1);
|
||||||
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
|
intervalArgs.add(new IntervalBinding<Feature>(gatkIntervalFile.getAbsolutePath()));
|
||||||
|
|
||||||
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalSetRule, argCollection.intervalMerging, argCollection.intervalPadding, genomeLocParser);
|
IntervalUtils.loadIntervals(intervalArgs, argCollection.intervalArguments.intervalSetRule, argCollection.intervalArguments.intervalMerging, argCollection.intervalArguments.intervalPadding, genomeLocParser);
|
||||||
}
|
}
|
||||||
|
|
||||||
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
|
private File createTempFile( String tempFilePrefix, String tempFileExtension, String... lines ) throws Exception {
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,8 @@ package org.broadinstitute.sting.queue.util
|
||||||
|
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import org.broadinstitute.sting.utils.io.FileExtension
|
import org.broadinstitute.sting.utils.io.FileExtension
|
||||||
|
import java.util.Date
|
||||||
|
import java.net.URL
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An extension of java.io.File that can be pulled from or pushed to a remote location.
|
* An extension of java.io.File that can be pulled from or pushed to a remote location.
|
||||||
|
|
@ -35,5 +37,6 @@ trait RemoteFile extends File with FileExtension {
|
||||||
def pullToLocal()
|
def pullToLocal()
|
||||||
def pushToRemote()
|
def pushToRemote()
|
||||||
def deleteRemote()
|
def deleteRemote()
|
||||||
|
def createUrl(expiration: Date): URL
|
||||||
def remoteDescription: String
|
def remoteDescription: String
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue