Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
769e190202
|
|
@ -29,17 +29,14 @@ import org.broad.tribble.AbstractFeatureReader;
|
|||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureReader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -73,7 +70,11 @@ public final class IntervalBinding<T extends Feature> {
|
|||
return stringIntervals;
|
||||
}
|
||||
|
||||
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
|
||||
public List<GenomeLoc> getIntervals(final GenomeAnalysisEngine toolkit) {
|
||||
return getIntervals(toolkit.getGenomeLocParser());
|
||||
}
|
||||
|
||||
public List<GenomeLoc> getIntervals(final GenomeLocParser genomeLocParser) {
|
||||
List<GenomeLoc> intervals;
|
||||
|
||||
if ( featureIntervals != null ) {
|
||||
|
|
@ -83,17 +84,17 @@ public final class IntervalBinding<T extends Feature> {
|
|||
|
||||
final FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
|
||||
if ( codec instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
|
||||
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
|
||||
try {
|
||||
FeatureReader<Feature> reader = AbstractFeatureReader.getFeatureReader(featureIntervals.getSource(), codec, false);
|
||||
for ( Feature feature : reader.iterator() )
|
||||
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(feature));
|
||||
intervals.add(genomeLocParser.createGenomeLoc(feature));
|
||||
} catch (Exception e) {
|
||||
throw new UserException.MalformedFile(featureIntervals.getSource(), "Problem reading the interval file", e);
|
||||
}
|
||||
|
||||
} else {
|
||||
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
|
||||
intervals = IntervalUtils.parseIntervalArguments(genomeLocParser, stringIntervals);
|
||||
}
|
||||
|
||||
return intervals;
|
||||
|
|
|
|||
|
|
@ -592,7 +592,7 @@ public class GenomeAnalysisEngine {
|
|||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
|
||||
loadIntervals(argCollection.intervals, argCollection.intervalSetRule, argCollection.intervalPadding));
|
||||
|
||||
// if no exclude arguments, can return parseIntervalArguments directly
|
||||
if ( argCollection.excludeIntervals == null )
|
||||
|
|
@ -619,16 +619,31 @@ public class GenomeAnalysisEngine {
|
|||
* @param rule interval merging rule
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
|
||||
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule ) {
|
||||
return loadIntervals(argList, rule, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @param padding how much to pad the intervals
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( final List<IntervalBinding<Feature>> argList, final IntervalSetRule rule, final int padding ) {
|
||||
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
|
||||
for ( IntervalBinding intervalBinding : argList ) {
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this.getGenomeLocParser());
|
||||
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
if ( padding > 0 ) {
|
||||
intervals = IntervalUtils.getIntervalsWithFlanks(this.getGenomeLocParser(), intervals, padding);
|
||||
}
|
||||
|
||||
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -104,6 +104,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
|
||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||
|
||||
/**
|
||||
* For example, '-L chr1:100' with a padding value of 20 would turn into '-L chr1:80-120'.
|
||||
*/
|
||||
@Argument(fullName = "interval_padding", shortName = "ip", doc = "Indicates how many basepairs of padding to include around each of the intervals specified with the -L/--intervals argument", required = false)
|
||||
public int intervalPadding = 0;
|
||||
|
||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||
public File referenceFile = null;
|
||||
|
||||
|
|
|
|||
|
|
@ -283,6 +283,41 @@ public class GATKReport {
|
|||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* The constructor for a simplified GATK Report. Simplified GATK report are designed for reports that do not need
|
||||
* the advanced functionality of a full GATK Report.
|
||||
* <p/>
|
||||
* A simple GATK Report consists of:
|
||||
* <p/>
|
||||
* - A single table
|
||||
* - No primary key ( it is hidden )
|
||||
* <p/>
|
||||
* Optional:
|
||||
* - Only untyped columns. As long as the data is an Object, it will be accepted.
|
||||
* - Default column values being empty strings.
|
||||
* <p/>
|
||||
* Limitations:
|
||||
* <p/>
|
||||
* - A simple GATK report cannot contain multiple tables.
|
||||
* - It cannot contain typed columns, which prevents arithmetic gathering.
|
||||
*
|
||||
* @param tableName The name of your simple GATK report table
|
||||
* @param columns The names of the columns in your table
|
||||
* @return a simplified GATK report
|
||||
*/
|
||||
public static GATKReport newSimpleReport(final String tableName, final List<String> columns) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report", columns.size());
|
||||
|
||||
for (String column : columns) {
|
||||
table.addColumn(column, "");
|
||||
}
|
||||
|
||||
GATKReport output = new GATKReport();
|
||||
output.addTable(table);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method provides an efficient way to populate a simplified GATK report. This method will only work on reports
|
||||
* that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information.
|
||||
|
|
@ -303,4 +338,27 @@ public class GATKReport {
|
|||
for ( int i = 0; i < values.length; i++ )
|
||||
table.set(rowIndex, i, values[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method provides an efficient way to populate a simplified GATK report. This method will only work on reports
|
||||
* that qualify as simplified GATK reports. See the newSimpleReport() constructor for more information.
|
||||
*
|
||||
* @param values the row of data to be added to the table.
|
||||
* Note: the number of arguments must match the columns in the table.
|
||||
*/
|
||||
public void addRowList(final List<Object> values) {
|
||||
if ( tables.size() != 1 )
|
||||
throw new ReviewedStingException("Cannot write a row to a complex GATK Report");
|
||||
|
||||
GATKReportTable table = tables.firstEntry().getValue();
|
||||
if ( table.getNumColumns() != values.size() )
|
||||
throw new ReviewedStingException("The number of arguments in writeRow() must match the number of columns in the table");
|
||||
|
||||
final int rowIndex = table.getNumRows();
|
||||
int idx = 0;
|
||||
for ( Object value : values ) {
|
||||
table.set(rowIndex,idx,value);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -405,15 +405,19 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
printGeneStats(statsByInterval);
|
||||
}
|
||||
|
||||
for(DoCOutputType.Partition partition: partitionTypes) {
|
||||
if ( checkType(statsByInterval.get(0).getSecond().getCoverageByAggregationType(partition) ,partition) ) {
|
||||
printIntervalStats(statsByInterval,
|
||||
getCorrectStream(partition, DoCOutputType.Aggregation.interval, DoCOutputType.FileType.summary),
|
||||
getCorrectStream(partition, DoCOutputType.Aggregation.interval, DoCOutputType.FileType.statistics),
|
||||
partition);
|
||||
} else {
|
||||
throw new ReviewedStingException("Partition type "+partition.toString()+" had no entries. Please check that your .bam header has all appropriate partition types.");
|
||||
if ( statsByInterval.size() > 0 ) {
|
||||
for(DoCOutputType.Partition partition: partitionTypes) {
|
||||
if ( checkType(statsByInterval.get(0).getSecond().getCoverageByAggregationType(partition) ,partition) ) {
|
||||
printIntervalStats(statsByInterval,
|
||||
getCorrectStream(partition, DoCOutputType.Aggregation.interval, DoCOutputType.FileType.summary),
|
||||
getCorrectStream(partition, DoCOutputType.Aggregation.interval, DoCOutputType.FileType.statistics),
|
||||
partition);
|
||||
} else {
|
||||
throw new ReviewedStingException("Partition type "+partition.toString()+" had no entries. Please check that your .bam header has all appropriate partition types.");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new UserException.CommandLineException("Cannot reduce by interval without interval list provided. Please provide a -L argument.");
|
||||
}
|
||||
|
||||
onTraversalDone(mergeAll(statsByInterval));
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantSummary;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -14,8 +14,9 @@ import java.util.*;
|
|||
/**
|
||||
* Stratifies the eval RODs by the allele count of the alternate allele
|
||||
*
|
||||
* Looks at the AC value in the INFO field, and uses that value if present. If absent,
|
||||
* computes the AC from the genotypes themselves. For no AC can be computed, 0 is used.
|
||||
* Looks first at the MLEAC value in the INFO field, and uses that value if present.
|
||||
* If not present, it then looks for the AC value in the INFO field. If both are absent,
|
||||
* it computes the AC from the genotypes themselves. If no AC can be computed, 0 is used.
|
||||
*/
|
||||
public class AlleleCount extends VariantStratifier {
|
||||
@Override
|
||||
|
|
@ -41,8 +42,10 @@ public class AlleleCount extends VariantStratifier {
|
|||
if (eval != null) {
|
||||
int AC = 0; // by default, the site is considered monomorphic
|
||||
|
||||
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
||||
AC = eval.getAttributeAsInt("AC", 0);
|
||||
if ( eval.hasAttribute(VCFConstants.MLE_ALLELE_COUNT_KEY) && eval.isBiallelic() ) {
|
||||
AC = eval.getAttributeAsInt(VCFConstants.MLE_ALLELE_COUNT_KEY, 0);
|
||||
} else if ( eval.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && eval.isBiallelic() ) {
|
||||
AC = eval.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||
} else if ( eval.isVariant() ) {
|
||||
for (Allele allele : eval.getAlternateAlleles())
|
||||
AC = Math.max(AC, eval.getCalledChrCount(allele));
|
||||
|
|
|
|||
|
|
@ -587,6 +587,27 @@ public final class GenomeLocParser {
|
|||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a loc padded in both directions by maxBasePairs size (if possible).
|
||||
* @param loc The original loc
|
||||
* @param padding The number of base pairs to pad on either end
|
||||
* @return The contiguous loc of length up to the original length + 2*padding (depending on the start/end of the contig).
|
||||
*/
|
||||
@Requires({"loc != null", "padding > 0"})
|
||||
public GenomeLoc createPaddedGenomeLoc(final GenomeLoc loc, final int padding) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return loc;
|
||||
final String contigName = loc.getContig();
|
||||
final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
|
||||
final int contigIndex = contig.getSequenceIndex();
|
||||
final int contigLength = contig.getSequenceLength();
|
||||
|
||||
final int start = Math.max(1, loc.getStart() - padding);
|
||||
final int stop = Math.min(contigLength, loc.getStop() + padding);
|
||||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
|
||||
* @param loc The original loc
|
||||
|
|
|
|||
|
|
@ -648,6 +648,26 @@ public class IntervalUtils {
|
|||
return expanded;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of intervals between the passed int locs. Does not extend UNMAPPED locs.
|
||||
* @param parser A genome loc parser for creating the new intervals
|
||||
* @param locs Original genome locs
|
||||
* @param basePairs Number of base pairs on each side of loc
|
||||
* @return The list of intervals between the locs
|
||||
*/
|
||||
public static List<GenomeLoc> getIntervalsWithFlanks(final GenomeLocParser parser, final List<GenomeLoc> locs, final int basePairs) {
|
||||
|
||||
if (locs.size() == 0)
|
||||
return Collections.emptyList();
|
||||
|
||||
final List<GenomeLoc> expanded = new ArrayList<GenomeLoc>();
|
||||
for ( final GenomeLoc loc : locs ) {
|
||||
expanded.add(parser.createPaddedGenomeLoc(loc, basePairs));
|
||||
}
|
||||
|
||||
return sortAndMergeIntervals(parser, expanded, IntervalMergingRule.ALL).toList();
|
||||
}
|
||||
|
||||
private static LinkedHashMap<String, List<GenomeLoc>> splitByContig(List<GenomeLoc> sorted) {
|
||||
LinkedHashMap<String, List<GenomeLoc>> splits = new LinkedHashMap<String, List<GenomeLoc>>();
|
||||
GenomeLoc last = null;
|
||||
|
|
|
|||
|
|
@ -32,11 +32,12 @@ import java.util.Arrays;
|
|||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf";
|
||||
private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
|
||||
private static String fundamentalTestSNPsSplit1of2VCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.split_1_of_2.vcf";
|
||||
private static String fundamentalTestSNPsSplit2of2VCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.split_2_of_2.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.NA12045.vcf";
|
||||
|
||||
private static String cmdRoot = "-T VariantEval" +
|
||||
" -R " + b36KGReference;
|
||||
|
|
@ -457,6 +458,27 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
executeTest("testAlleleCountStrat", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlleleCountStratWithMLE() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"--dbsnp " + b37dbSNP132,
|
||||
"--eval " + fundamentalTestSNPsWithMLEVCF,
|
||||
"-noEV",
|
||||
"-EV CountVariants",
|
||||
"-noST",
|
||||
"-ST AlleleCount",
|
||||
"-L " + fundamentalTestSNPsWithMLEVCF,
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("cadd4e32ab82f7e43d0e510dfe5e6dda")
|
||||
);
|
||||
executeTest("testAlleleCountStratWithMLE", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleEvalTracksAlleleCountWithMerge() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header",
|
||||
1,
|
||||
Arrays.asList("5bf9663274ceb552f5469f8c1dfc22ed")
|
||||
Arrays.asList("be38bdc7bd88f5d09cf1a9d55cfecb0b")
|
||||
);
|
||||
|
||||
executeTest("testRegenotype--" + testFile, spec);
|
||||
|
|
|
|||
|
|
@ -207,11 +207,26 @@ public class IntervalIntegrationTest extends WalkerTest {
|
|||
" -R " + hg18Reference +
|
||||
" -o %s" +
|
||||
" -L " + validationDataLocation + "intervalTest.3.vcf",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testComplexVCF", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testComplexVCFWithPadding() {
|
||||
String md5 = "649ee93d50739c656e94ec88a32c7ffe";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T CountLoci" +
|
||||
" --interval_padding 2" +
|
||||
" -I " + validationDataLocation + "OV-0930.normal.chunk.bam" +
|
||||
" -R " + hg18Reference +
|
||||
" -o %s" +
|
||||
" -L " + validationDataLocation + "intervalTest.3.vcf",
|
||||
1, // just one output file
|
||||
Arrays.asList(md5));
|
||||
executeTest("testComplexVCFWithPadding", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMergingWithComplexVCF() {
|
||||
String md5 = "6d7fce9fee471194aa8b5b6e47267f03";
|
||||
|
|
|
|||
Loading…
Reference in New Issue