Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
67c0569f9c
|
|
@ -110,7 +110,7 @@ public class BQSRKeyManager {
|
|||
* @return one key in long representation per covariate
|
||||
*/
|
||||
public Long[] longsFromAllKeys(final Long[] allKeys, final EventType eventType) {
|
||||
final Long[] allFinalKeys = new Long[optionalCovariatesInfo.length > 0 ? optionalCovariatesInfo.length : 1]; // Generate one key per optional covariate
|
||||
final ArrayList<Long> allFinalKeys = new ArrayList<Long>(); // Generate one key per optional covariate
|
||||
|
||||
int covariateIndex = 0;
|
||||
long masterKey = 0L; // This will be a master key holding all the required keys, to replicate later on
|
||||
|
|
@ -128,13 +128,13 @@ public class BQSRKeyManager {
|
|||
long newKey = masterKey | (covariateKey << optionalCovariateOffset);
|
||||
newKey |= (optionalCovariatesInfo[i].covariateID << optionalCovariateIDOffset);
|
||||
|
||||
allFinalKeys[i] = newKey; // add this key to the list of keys
|
||||
allFinalKeys.add(newKey); // add this key to the list of keys
|
||||
}
|
||||
|
||||
if (optionalCovariatesInfo.length == 0) // special case when we have no optional covariates
|
||||
allFinalKeys[0] = masterKey;
|
||||
allFinalKeys.add(masterKey);
|
||||
|
||||
return allFinalKeys;
|
||||
return allFinalKeys.toArray(new Long[allFinalKeys.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -33,7 +33,8 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
|
@ -147,7 +148,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
|
||||
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); // get all of the unique sample names for the VCF Header
|
||||
vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples)); // initialize the VCF header
|
||||
vcfWriter.writeHeader(new VCFHeader(ThresHolder.getHeaderInfo(), samples)); // initialize the VCF header
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -249,6 +250,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
private void outputStatsToVCF(IntervalStatistics stats, Allele refAllele) {
|
||||
GenomeLoc interval = stats.getInterval();
|
||||
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
|
@ -264,7 +266,9 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
attributes.put(VCFConstants.DEPTH_KEY, stats.averageCoverage());
|
||||
|
||||
vcb = vcb.attributes(attributes);
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||
}
|
||||
for (String sample : samples) {
|
||||
Map<String, Object> infos = new HashMap<String, Object>();
|
||||
SampleStatistics sampleStat = stats.getSample(sample);
|
||||
|
|
@ -276,47 +280,19 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
Set<String> filters = new HashSet<String>();
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses(thresholds)));
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Found %d bad mates out of %d reads %n", sampleStat.getnBadMates(), sampleStat.getnReads());
|
||||
}
|
||||
|
||||
genotypes.add(new Genotype(sample, null, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
if (debug) {
|
||||
System.out.printf("Output -- Interval: %s, Coverage: %.2f%n", stats.getInterval(), stats.averageCoverage());
|
||||
}
|
||||
|
||||
vcfWriter.add(vcb.make());
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header lines for the VCF writer
|
||||
*
|
||||
* @return A set of VCF header lines
|
||||
*/
|
||||
private static Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
||||
|
||||
// FILTER fields
|
||||
for (CallableStatus stat : CallableStatus.values())
|
||||
headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description));
|
||||
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Function that process a set of statuses into strings
|
||||
*
|
||||
|
|
@ -333,6 +309,6 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
}
|
||||
|
||||
private IntervalStatistics createIntervalStatistic(GenomeLoc interval) {
|
||||
return new IntervalStatistics(samples, interval /*, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality*/);
|
||||
return new IntervalStatistics(samples, interval);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
|
||||
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
||||
@Output(required = true)
|
||||
private PrintStream out;
|
||||
|
||||
@Override
|
||||
// Look to see if the region has sufficient coverage
|
||||
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
|
||||
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
|
||||
|
||||
// note the linear probability scale
|
||||
int coverageThreshold = 20;
|
||||
return Math.min((double) depth / coverageThreshold, 1);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc map(final ActiveRegion activeRegion, final RefMetaDataTracker tracker) {
|
||||
if (activeRegion.isActive)
|
||||
return activeRegion.getLocation();
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long reduceInit() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long reduce(final GenomeLoc value, Long reduce) {
|
||||
if (value != null) {
|
||||
out.println(value.toString());
|
||||
return reduce++;
|
||||
} else
|
||||
return reduce;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Long reduce) {
|
||||
logger.info(String.format("Found %d intervals", reduce));
|
||||
}
|
||||
}
|
||||
|
|
@ -104,19 +104,19 @@ class SampleStatistics {
|
|||
|
||||
double intervalSize = interval.size();
|
||||
|
||||
if ((nBadMates / nReads) > thresholds.getBadMateStatusThreshold())
|
||||
if (((double) nBadMates / nReads) >= thresholds.getBadMateStatusThreshold())
|
||||
output.add(CallableStatus.BAD_MATE);
|
||||
|
||||
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
||||
if ((totals.get(CallableStatus.COVERAGE_GAPS) / intervalSize) >= thresholds.getCoverageStatusThreshold())
|
||||
output.add(CallableStatus.COVERAGE_GAPS);
|
||||
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > thresholds.getCoverageStatusThreshold())
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) >= thresholds.getCoverageStatusThreshold())
|
||||
output.add(CallableStatus.LOW_COVERAGE);
|
||||
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > thresholds.getExcessiveCoverageThreshold())
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) >= thresholds.getExcessiveCoverageThreshold())
|
||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > thresholds.getQualityStatusThreshold())
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) >= thresholds.getQualityStatusThreshold())
|
||||
output.add(CallableStatus.POOR_QUALITY);
|
||||
|
||||
if (totals.get(CallableStatus.REF_N) > 0)
|
||||
|
|
@ -146,7 +146,7 @@ class SampleStatistics {
|
|||
int locusIndex = locus.getStart() - interval.getStart();
|
||||
|
||||
int rawCoverage = pileup.depthOfCoverage();
|
||||
int coverage = pileup.getBaseAndMappingFilteredPileup(thresholds.getMinimumBaseQuality(), thresholds.getMinimumMappingQuality()).depthOfCoverage();
|
||||
int coverage = thresholds.getFilteredCoverage(pileup);
|
||||
|
||||
LocusStatistics locusData = new LocusStatistics(coverage, rawCoverage);
|
||||
|
||||
|
|
@ -161,7 +161,7 @@ class SampleStatistics {
|
|||
// Was this read already processed?
|
||||
if (read.getTemporaryAttribute("checkedBadMate") == null) {
|
||||
nReads++;
|
||||
if (hasValidMate(read, thresholds))
|
||||
if (!hasValidMate(read, thresholds))
|
||||
nBadMates++;
|
||||
read.setTemporaryAttribute("checkedBadMate", true);
|
||||
}
|
||||
|
|
@ -254,7 +254,7 @@ class SampleStatistics {
|
|||
* reasonable insert size?
|
||||
* inverted?
|
||||
* same orientation?
|
||||
* todo - same contig?
|
||||
* same contig?
|
||||
* is pair mapped?
|
||||
* todo - is forced mate?
|
||||
*
|
||||
|
|
@ -264,6 +264,10 @@ class SampleStatistics {
|
|||
if (!read.getReadPairedFlag())
|
||||
return false;
|
||||
|
||||
// different contigs
|
||||
if (read.getMateReferenceIndex() != read.getReferenceIndex())
|
||||
return false;
|
||||
|
||||
// unmapped
|
||||
if (read.getMateUnmappedFlag() || read.getReadUnmappedFlag())
|
||||
return false;
|
||||
|
|
@ -277,10 +281,19 @@ class SampleStatistics {
|
|||
read.getAlignmentStart() < read.getMateAlignmentStart())
|
||||
return false;
|
||||
|
||||
// TODO note: IGV uses a different alorithm for insert size, there should be a common util class that does this for you
|
||||
// mates are too far apart
|
||||
if (Math.abs(read.getAlignmentStart() - read.getMateAlignmentStart()) > thresholds.getMaximumInsertSize())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getnReads() {
|
||||
return nReads;
|
||||
}
|
||||
|
||||
public int getnBadMates() {
|
||||
return nBadMates;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
class ThresHolder {
|
||||
public static final ThresHolder DEFAULTS = new ThresHolder(20, 20, 5, 700, 20, 50, 0.5, 0.2, 0.5, 0.2, 0.2, 0.5);
|
||||
|
||||
|
|
@ -69,14 +75,6 @@ class ThresHolder {
|
|||
this.qualityStatusThreshold = qualityStatusThreshold;
|
||||
}
|
||||
|
||||
public int getMinimumBaseQuality() {
|
||||
return minimumBaseQuality;
|
||||
}
|
||||
|
||||
public int getMinimumMappingQuality() {
|
||||
return minimumMappingQuality;
|
||||
}
|
||||
|
||||
public int getMinimumCoverage() {
|
||||
return minimumCoverage;
|
||||
}
|
||||
|
|
@ -116,4 +114,37 @@ class ThresHolder {
|
|||
public double getQualityStatusThreshold() {
|
||||
return qualityStatusThreshold;
|
||||
}
|
||||
|
||||
public int getFilteredCoverage(ReadBackedPileup pileup) {
|
||||
return pileup.getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header lines for the VCF writer
|
||||
*
|
||||
* @return A set of VCF header lines
|
||||
*/
|
||||
public static Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
headerLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
// todo -- find the appropriate VCF constants
|
||||
headerLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a loci divided by interval size."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q1", 1, VCFHeaderLineType.Float, "Lower Quartile of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("MED", 1, VCFHeaderLineType.Float, "Median of depth distribution."));
|
||||
headerLines.add(new VCFFormatHeaderLine("Q3", 1, VCFHeaderLineType.Float, "Upper Quartile of depth Distribution."));
|
||||
|
||||
|
||||
// FILTER fields
|
||||
for (CallableStatus stat : CallableStatus.values())
|
||||
headerLines.add(new VCFFilterHeaderLine(stat.name(), stat.description));
|
||||
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue