Resolving merge conflict.
This commit is contained in:
commit
e3cc7cc59c
|
|
@ -51,11 +51,10 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
|||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
import java.util.concurrent.Callable;
|
||||
|
||||
/**
|
||||
* User: aaron
|
||||
|
|
@ -466,7 +465,6 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Fill the given buffering shard with reads.
|
||||
* @param shard Shard to fill.
|
||||
* @return true if at the end of the stream. False otherwise.
|
||||
*/
|
||||
public void fillShard(Shard shard) {
|
||||
if(!shard.buffersReads())
|
||||
|
|
@ -561,15 +559,19 @@ public class SAMDataSource {
|
|||
if(shard.getFileSpans().get(id) == null)
|
||||
throw new ReviewedStingException("SAMDataSource: received null location for reader " + id + ", but null locations are no longer supported.");
|
||||
|
||||
if(threadAllocation.getNumIOThreads() > 0) {
|
||||
BlockInputStream inputStream = readers.getInputStream(id);
|
||||
inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
|
||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory);
|
||||
codec.setInputStream(inputStream);
|
||||
iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
|
||||
}
|
||||
else {
|
||||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
||||
try {
|
||||
if(threadAllocation.getNumIOThreads() > 0) {
|
||||
BlockInputStream inputStream = readers.getInputStream(id);
|
||||
inputStream.submitAccessPlan(new BAMAccessPlan(id, inputStream, (GATKBAMFileSpan) shard.getFileSpans().get(id)));
|
||||
BAMRecordCodec codec = new BAMRecordCodec(getHeader(id),factory);
|
||||
codec.setInputStream(inputStream);
|
||||
iterator = new BAMCodecIterator(inputStream,readers.getReader(id),codec);
|
||||
}
|
||||
else {
|
||||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
||||
}
|
||||
} catch ( RuntimeException e ) { // we need to catch RuntimeExceptions here because the Picard code is throwing them (among SAMFormatExceptions) sometimes
|
||||
throw new UserException.MalformedBAM(id.samFile, e.getMessage());
|
||||
}
|
||||
|
||||
iterator = new MalformedBAMErrorReformatingIterator(id.samFile, iterator);
|
||||
|
|
@ -924,10 +926,7 @@ public class SAMDataSource {
|
|||
blockInputStream = new BlockInputStream(dispatcher,readerID,false);
|
||||
reader = new SAMFileReader(readerID.samFile,indexFile,false);
|
||||
} catch ( RuntimeIOException e ) {
|
||||
if ( e.getCause() != null && e.getCause() instanceof FileNotFoundException )
|
||||
throw new UserException.CouldNotReadInputFile(readerID.samFile, e);
|
||||
else
|
||||
throw e;
|
||||
throw new UserException.CouldNotReadInputFile(readerID.samFile, e);
|
||||
} catch ( SAMFormatException e ) {
|
||||
throw new UserException.MalformedBAM(readerID.samFile, e.getMessage());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,23 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* * OTHER DEALINGS IN THE SOFTWARE.
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.coverage;
|
||||
|
|
@ -42,40 +44,40 @@ import java.io.PrintStream;
|
|||
|
||||
/**
|
||||
* Emits a data file containing information about callable, uncallable, poorly mapped, and other parts of the genome
|
||||
*
|
||||
* <p/>
|
||||
* <p>
|
||||
* A very common question about a NGS set of reads is what areas of the genome are considered callable. The system
|
||||
* considers the coverage at each locus and emits either a per base state or a summary interval BED file that
|
||||
* partitions the genomic intervals into the following callable states:
|
||||
* <dl>
|
||||
* <dt>REF_N</dt>
|
||||
* <dd>the reference base was an N, which is not considered callable the GATK</dd>
|
||||
* <dt>CALLABLE</dt>
|
||||
* <dd>the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE</dd>
|
||||
* <dt>NO_COVERAGE</dt>
|
||||
* <dd>absolutely no reads were seen at this locus, regardless of the filtering parameters</dd>
|
||||
* <dt>LOW_COVERAGE</dt>
|
||||
* <dd>there were less than min. depth bases at the locus, after applying filters</dd>
|
||||
* <dt>EXCESSIVE_COVERAGE</dt>
|
||||
* <dd>more than -maxDepth read at the locus, indicating some sort of mapping problem</dd>
|
||||
* <dt>POOR_MAPPING_QUALITY</dt>
|
||||
* <dd>more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads</dd>
|
||||
* <dt>REF_N</dt>
|
||||
* <dd>the reference base was an N, which is not considered callable the GATK</dd>
|
||||
* <dt>PASS</dt>
|
||||
* <dd>the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE</dd>
|
||||
* <dt>NO_COVERAGE</dt>
|
||||
* <dd>absolutely no reads were seen at this locus, regardless of the filtering parameters</dd>
|
||||
* <dt>LOW_COVERAGE</dt>
|
||||
* <dd>there were less than min. depth bases at the locus, after applying filters</dd>
|
||||
* <dt>EXCESSIVE_COVERAGE</dt>
|
||||
* <dd>more than -maxDepth read at the locus, indicating some sort of mapping problem</dd>
|
||||
* <dt>POOR_MAPPING_QUALITY</dt>
|
||||
* <dd>more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads</dd>
|
||||
* </dl>
|
||||
* </p>
|
||||
*
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* A BAM file containing <b>exactly one sample</b>.
|
||||
* A BAM file containing <b>exactly one sample</b>.
|
||||
* </p>
|
||||
*
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* <ul>
|
||||
* <li>-o: a OutputFormatted (recommended BED) file with the callable status covering each base</li>
|
||||
* <li>-summary: a table of callable status x count of all examined bases</li>
|
||||
* <li>-o: a OutputFormatted (recommended BED) file with the callable status covering each base</li>
|
||||
* <li>-summary: a table of callable status x count of all examined bases</li>
|
||||
* </ul>
|
||||
* </p>
|
||||
*
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* -T CallableLociWalker \
|
||||
|
|
@ -83,31 +85,31 @@ import java.io.PrintStream;
|
|||
* -summary my.summary \
|
||||
* -o my.bed
|
||||
* </pre>
|
||||
*
|
||||
* <p/>
|
||||
* would produce a BED file (my.bed) that looks like:
|
||||
*
|
||||
* <p/>
|
||||
* <pre>
|
||||
* 20 10000000 10000864 CALLABLE
|
||||
* 20 10000000 10000864 PASS
|
||||
* 20 10000865 10000985 POOR_MAPPING_QUALITY
|
||||
* 20 10000986 10001138 CALLABLE
|
||||
* 20 10000986 10001138 PASS
|
||||
* 20 10001139 10001254 POOR_MAPPING_QUALITY
|
||||
* 20 10001255 10012255 CALLABLE
|
||||
* 20 10001255 10012255 PASS
|
||||
* 20 10012256 10012259 POOR_MAPPING_QUALITY
|
||||
* 20 10012260 10012263 CALLABLE
|
||||
* 20 10012260 10012263 PASS
|
||||
* 20 10012264 10012328 POOR_MAPPING_QUALITY
|
||||
* 20 10012329 10012550 CALLABLE
|
||||
* 20 10012329 10012550 PASS
|
||||
* 20 10012551 10012551 LOW_COVERAGE
|
||||
* 20 10012552 10012554 CALLABLE
|
||||
* 20 10012552 10012554 PASS
|
||||
* 20 10012555 10012557 LOW_COVERAGE
|
||||
* 20 10012558 10012558 CALLABLE
|
||||
* 20 10012558 10012558 PASS
|
||||
* et cetera...
|
||||
* </pre>
|
||||
* as well as a summary table that looks like:
|
||||
*
|
||||
* <p/>
|
||||
* <pre>
|
||||
* state nBases
|
||||
* REF_N 0
|
||||
* CALLABLE 996046
|
||||
* PASS 996046
|
||||
* NO_COVERAGE 121
|
||||
* LOW_COVERAGE 928
|
||||
* EXCESSIVE_COVERAGE 0
|
||||
|
|
@ -139,21 +141,21 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
byte maxLowMAPQ = 1;
|
||||
|
||||
/**
|
||||
* Reads with MAPQ > minMappingQuality are treated as usable for variation detection, contributing to the CALLABLE
|
||||
* Reads with MAPQ > minMappingQuality are treated as usable for variation detection, contributing to the PASS
|
||||
* state.
|
||||
*/
|
||||
@Argument(fullName = "minMappingQuality", shortName = "mmq", doc = "Minimum mapping quality of reads to count towards depth.", required = false)
|
||||
byte minMappingQuality = 10;
|
||||
|
||||
/**
|
||||
* Bases with less than minBaseQuality are viewed as not sufficiently high quality to contribute to the CALLABLE state
|
||||
* Bases with less than minBaseQuality are viewed as not sufficiently high quality to contribute to the PASS state
|
||||
*/
|
||||
@Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth.", required = false)
|
||||
byte minBaseQuality = 20;
|
||||
|
||||
/**
|
||||
* If the number of QC+ bases (on reads with MAPQ > minMappingQuality and with base quality > minBaseQuality) exceeds this
|
||||
* value and is less than maxDepth the site is considered CALLABLE.
|
||||
* value and is less than maxDepth the site is considered PASS.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "minDepth", shortName = "minDepth", doc = "Minimum QC+ read depth before a locus is considered callable", required = false)
|
||||
|
|
@ -191,7 +193,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
public enum OutputFormat {
|
||||
/**
|
||||
* The output will be written as a BED file. There's a BED element for each
|
||||
* continuous run of callable states (i.e., CALLABLE, REF_N, etc). This is the recommended
|
||||
* continuous run of callable states (i.e., PASS, REF_N, etc). This is the recommended
|
||||
* format
|
||||
*/
|
||||
BED,
|
||||
|
|
@ -204,17 +206,29 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
}
|
||||
|
||||
public enum CalledState {
|
||||
/** the reference base was an N, which is not considered callable the GATK */
|
||||
/**
|
||||
* the reference base was an N, which is not considered callable the GATK
|
||||
*/
|
||||
REF_N,
|
||||
/** the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE */
|
||||
/**
|
||||
* the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE
|
||||
*/
|
||||
CALLABLE,
|
||||
/** absolutely no reads were seen at this locus, regardless of the filtering parameters */
|
||||
/**
|
||||
* absolutely no reads were seen at this locus, regardless of the filtering parameters
|
||||
*/
|
||||
NO_COVERAGE,
|
||||
/** there were less than min. depth bases at the locus, after applying filters */
|
||||
/**
|
||||
* there were less than min. depth bases at the locus, after applying filters
|
||||
*/
|
||||
LOW_COVERAGE,
|
||||
/** more than -maxDepth read at the locus, indicating some sort of mapping problem */
|
||||
/**
|
||||
* more than -maxDepth read at the locus, indicating some sort of mapping problem
|
||||
*/
|
||||
EXCESSIVE_COVERAGE,
|
||||
/** more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads */
|
||||
/**
|
||||
* more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads
|
||||
*/
|
||||
POOR_MAPPING_QUALITY
|
||||
}
|
||||
|
||||
|
|
@ -223,11 +237,13 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
public boolean includeReadsWithDeletionAtLoci() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
if ( getSampleDB().getSamples().size() != 1 ) {
|
||||
if (getSampleDB().getSamples().size() != 1) {
|
||||
throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getSampleDB().getSamples());
|
||||
}
|
||||
|
||||
|
|
@ -249,7 +265,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
public GenomeLoc loc;
|
||||
final public CalledState state;
|
||||
|
||||
public CallableBaseState(GenomeLocParser genomeLocParser,GenomeLoc loc, CalledState state) {
|
||||
public CallableBaseState(GenomeLocParser genomeLocParser, GenomeLoc loc, CalledState state) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.loc = loc;
|
||||
this.state = state;
|
||||
|
|
@ -264,12 +280,13 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
}
|
||||
|
||||
// update routines
|
||||
public boolean changingState( CalledState newState ) {
|
||||
public boolean changingState(CalledState newState) {
|
||||
return state != newState;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updating the location of this CalledBaseState by the new stop location
|
||||
*
|
||||
* @param newStop
|
||||
*/
|
||||
public void update(GenomeLoc newStop) {
|
||||
|
|
@ -285,7 +302,7 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
public CallableBaseState map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
CalledState state;
|
||||
|
||||
if ( BaseUtils.isNBase(ref.getBase()) ) {
|
||||
if (BaseUtils.isNBase(ref.getBase())) {
|
||||
state = CalledState.REF_N;
|
||||
} else {
|
||||
// count up the depths of all and QC+ bases
|
||||
|
|
@ -293,29 +310,29 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
for (PileupElement e : context.getBasePileup()) {
|
||||
rawDepth++;
|
||||
|
||||
if ( e.getMappingQual() <= maxLowMAPQ )
|
||||
if (e.getMappingQual() <= maxLowMAPQ)
|
||||
lowMAPQDepth++;
|
||||
|
||||
if ( e.getMappingQual() >= minMappingQuality && ( e.getQual() >= minBaseQuality || e.isDeletion() ) ) {
|
||||
if (e.getMappingQual() >= minMappingQuality && (e.getQual() >= minBaseQuality || e.isDeletion())) {
|
||||
QCDepth++;
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.printf("%s rawdepth = %d QCDepth = %d lowMAPQ = %d%n", context.getLocation(), rawDepth, QCDepth, lowMAPQDepth);
|
||||
if ( rawDepth == 0 ) {
|
||||
if (rawDepth == 0) {
|
||||
state = CalledState.NO_COVERAGE;
|
||||
} else if ( rawDepth >= minDepthLowMAPQ && MathUtils.ratio( lowMAPQDepth, rawDepth ) >= maxLowMAPQFraction ) {
|
||||
} else if (rawDepth >= minDepthLowMAPQ && MathUtils.ratio(lowMAPQDepth, rawDepth) >= maxLowMAPQFraction) {
|
||||
state = CalledState.POOR_MAPPING_QUALITY;
|
||||
} else if ( QCDepth < minDepth ) {
|
||||
} else if (QCDepth < minDepth) {
|
||||
state = CalledState.LOW_COVERAGE;
|
||||
} else if ( rawDepth >= maxDepth && maxDepth != -1 ) {
|
||||
} else if (rawDepth >= maxDepth && maxDepth != -1) {
|
||||
state = CalledState.EXCESSIVE_COVERAGE;
|
||||
} else {
|
||||
state = CalledState.CALLABLE;
|
||||
}
|
||||
}
|
||||
|
||||
return new CallableBaseState(getToolkit().getGenomeLocParser(),context.getLocation(), state);
|
||||
return new CallableBaseState(getToolkit().getGenomeLocParser(), context.getLocation(), state);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -328,15 +345,15 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
// update counts
|
||||
integrator.counts[state.getState().ordinal()]++;
|
||||
|
||||
if ( outputFormat == OutputFormat.STATE_PER_BASE ) {
|
||||
if (outputFormat == OutputFormat.STATE_PER_BASE) {
|
||||
out.println(state.toString());
|
||||
}
|
||||
|
||||
// format is integrating
|
||||
if ( integrator.state == null )
|
||||
if (integrator.state == null)
|
||||
integrator.state = state;
|
||||
else if ( state.getLocation().getStart() != integrator.state.getLocation().getStop() + 1 ||
|
||||
integrator.state.changingState(state.getState()) ) {
|
||||
else if (state.getLocation().getStart() != integrator.state.getLocation().getStop() + 1 ||
|
||||
integrator.state.changingState(state.getState())) {
|
||||
out.println(integrator.state.toString());
|
||||
integrator.state = state;
|
||||
} else {
|
||||
|
|
@ -354,14 +371,14 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB
|
|||
@Override
|
||||
public void onTraversalDone(Integrator result) {
|
||||
// print out the last state
|
||||
if ( result != null ) {
|
||||
if ( outputFormat == OutputFormat.BED ) // get the last interval
|
||||
if (result != null) {
|
||||
if (outputFormat == OutputFormat.BED) // get the last interval
|
||||
out.println(result.state.toString());
|
||||
|
||||
try {
|
||||
PrintStream summaryOut = new PrintStream(summaryFile);
|
||||
summaryOut.printf("%30s %s%n", "state", "nBases");
|
||||
for ( CalledState state : CalledState.values() ) {
|
||||
for (CalledState state : CalledState.values()) {
|
||||
summaryOut.printf("%30s %d%n", state, result.counts[state.ordinal()]);
|
||||
}
|
||||
summaryOut.close();
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
/**
|
||||
|
|
@ -7,16 +31,40 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
* @since 2/1/12
|
||||
*/
|
||||
public enum CallableStatus {
|
||||
/** the reference base was an N, which is not considered callable the GATK */
|
||||
REF_N,
|
||||
/** the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE */
|
||||
CALLABLE,
|
||||
/** absolutely no reads were seen at this locus, regardless of the filtering parameters */
|
||||
NO_COVERAGE,
|
||||
/** there were less than min. depth bases at the locus, after applying filters */
|
||||
LOW_COVERAGE,
|
||||
/** more than -maxDepth read at the locus, indicating some sort of mapping problem */
|
||||
EXCESSIVE_COVERAGE,
|
||||
/** more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads */
|
||||
POOR_QUALITY
|
||||
/**
|
||||
* the reference base was an N, which is not considered callable the GATK
|
||||
*/
|
||||
// todo -- implement this status
|
||||
REF_N("the reference base was an N, which is not considered callable the GATK"),
|
||||
/**
|
||||
* the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE
|
||||
*/
|
||||
PASS("the base satisfied the min. depth for calling but had less than maxDepth to avoid having EXCESSIVE_COVERAGE"),
|
||||
/**
|
||||
* absolutely no reads were seen at this locus, regardless of the filtering parameters
|
||||
*/
|
||||
NO_COVERAGE("absolutely no reads were seen at this locus, regardless of the filtering parameters"),
|
||||
/**
|
||||
* there were less than min. depth bases at the locus, after applying filters
|
||||
*/
|
||||
LOW_COVERAGE("there were less than min. depth bases at the locus, after applying filters"),
|
||||
/**
|
||||
* more than -maxDepth read at the locus, indicating some sort of mapping problem
|
||||
*/
|
||||
EXCESSIVE_COVERAGE("more than -maxDepth read at the locus, indicating some sort of mapping problem"),
|
||||
/**
|
||||
* more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads
|
||||
*/
|
||||
POOR_QUALITY("more than --maxFractionOfReadsWithLowMAPQ at the locus, indicating a poor mapping quality of the reads"),
|
||||
|
||||
BAD_MATE(""),
|
||||
|
||||
INCONSISTENT_COVERAGE("");
|
||||
|
||||
|
||||
public String description;
|
||||
|
||||
private CallableStatus(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,45 +1,67 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocComparator;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.TreeSet;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Long description of the walker]
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <p/>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Description of the Input]
|
||||
* </p>
|
||||
*
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Description of the Output]
|
||||
* </p>
|
||||
*
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
|
|
@ -51,12 +73,13 @@ import java.util.TreeSet;
|
|||
* @since 2/1/12
|
||||
*/
|
||||
@By(value = DataSource.READS)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> implements AnnotatorCompatibleWalker {
|
||||
@Input(fullName = "interval_track", shortName = "int", doc = "", required = true)
|
||||
private IntervalBinding<Feature> intervalTrack = null;
|
||||
|
||||
@Output
|
||||
private PrintStream out = System.out;
|
||||
@Output(doc = "File to which variants should be written", required = true)
|
||||
protected VCFWriter vcfWriter = null;
|
||||
|
||||
@Argument(fullName = "expand_interval", shortName = "exp", doc = "", required = false)
|
||||
private int expandInterval = 50;
|
||||
|
|
@ -73,13 +96,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
@Argument(fullName = "maximum_coverage", shortName = "maxcov", doc = "", required = false)
|
||||
private int maximumCoverage = 700;
|
||||
|
||||
private TreeSet<GenomeLoc> intervalList = null; // The list of intervals of interest (plus expanded intervals if user wants them)
|
||||
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // interval => statistics
|
||||
private Iterator<GenomeLoc> intervalListIterator; // An iterator to go over all the intervals provided as we traverse the genome
|
||||
private GenomeLoc currentInterval = null; // The "current" interval loaded and being filled with statistics
|
||||
private IntervalStatistics currentIntervalStatistics = null; // The "current" interval loaded and being filled with statistics
|
||||
|
||||
private GenomeLocParser parser; // just an object to allow us to create genome locs (for the expanded intervals)
|
||||
private TreeSet<GenomeLoc> intervalList = null; // The list of intervals of interest (plus expanded intervals if user wants them)
|
||||
private HashMap<GenomeLoc, IntervalStatistics> intervalMap = null; // interval => statistics
|
||||
private Iterator<GenomeLoc> intervalListIterator; // An iterator to go over all the intervals provided as we traverse the genome
|
||||
private GenomeLoc currentInterval = null; // The "current" interval loaded
|
||||
private IntervalStatistics currentIntervalStatistics = null; // The "current" interval being filled with statistics
|
||||
private Set<String> samples = null; // All the samples being processed
|
||||
private GenomeLocParser parser; // just an object to allow us to create genome locs (for the expanded intervals)
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
|
|
@ -88,38 +111,72 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
if (intervalTrack == null)
|
||||
throw new UserException("This tool currently only works if you provide an interval track");
|
||||
|
||||
parser = new GenomeLocParser(getToolkit().getMasterSequenceDictionary()); // Important to initialize the parser before creating the intervals below
|
||||
parser = new GenomeLocParser(getToolkit().getMasterSequenceDictionary()); // Important to initialize the parser before creating the intervals below
|
||||
|
||||
List<GenomeLoc> originalList = intervalTrack.getIntervals(getToolkit()); // The original list of targets provided by the user that will be expanded or not depending on the options provided
|
||||
List<GenomeLoc> originalList = intervalTrack.getIntervals(getToolkit()); // The original list of targets provided by the user that will be expanded or not depending on the options provided
|
||||
intervalList = new TreeSet<GenomeLoc>(new GenomeLocComparator());
|
||||
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>(originalList.size() * 2);
|
||||
intervalMap = new HashMap<GenomeLoc, IntervalStatistics>();
|
||||
for (GenomeLoc interval : originalList)
|
||||
addAndExpandIntervalToLists(interval);
|
||||
intervalList.add(interval);
|
||||
//addAndExpandIntervalToMap(interval);
|
||||
|
||||
intervalListIterator = intervalList.iterator();
|
||||
|
||||
// get all of the unique sample names
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
// initialize the header
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
|
||||
vcfWriter.writeHeader(new VCFHeader(headerInfo, samples));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header lines for the VCF writer
|
||||
*
|
||||
* @return A set of VCF header lines
|
||||
*/
|
||||
private Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// INFO fields for overall data
|
||||
headerLines.add(new VCFInfoHeaderLine("END", 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||
headerLines.add(new VCFInfoHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Total depth in the site. Sum of the depth of all pools"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AD", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
headerLines.add(new VCFInfoHeaderLine("Diagnose Targets", 0, VCFHeaderLineType.Flag, "DiagnoseTargets mode"));
|
||||
|
||||
// FORMAT fields for each genotype
|
||||
headerLines.add(new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Total depth in the site. Sum of the depth of all pools"));
|
||||
headerLines.add(new VCFFormatHeaderLine("AD", 1, VCFHeaderLineType.Float, "Average depth across the interval. Sum of the depth in a lci divided by interval size."));
|
||||
|
||||
// FILTER fields
|
||||
|
||||
for (CallableStatus stat : CallableStatus.values()) {
|
||||
headerLines.add(new VCFHeaderLine(stat.name(), stat.description));
|
||||
}
|
||||
|
||||
return headerLines;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
GenomeLoc refLocus = ref.getLocus();
|
||||
while (currentInterval == null || currentInterval.isBefore(refLocus)) {
|
||||
while (currentInterval == null || currentInterval.isBefore(refLocus)) { // do this for first time and while currentInterval is behind current locus
|
||||
if (!intervalListIterator.hasNext())
|
||||
return 0L;
|
||||
|
||||
if (currentInterval != null)
|
||||
processIntervalStats(currentInterval, Allele.create(ref.getBase(), true));
|
||||
|
||||
currentInterval = intervalListIterator.next();
|
||||
addAndExpandIntervalToMap(currentInterval);
|
||||
currentIntervalStatistics = intervalMap.get(currentInterval);
|
||||
}
|
||||
|
||||
if (currentInterval.isPast(refLocus))
|
||||
if (currentInterval.isPast(refLocus)) // skip if we are behind the current interval
|
||||
return 0L;
|
||||
|
||||
byte[] mappingQualities = context.getBasePileup().getMappingQuals();
|
||||
byte[] baseQualities = context.getBasePileup().getQuals();
|
||||
int coverage = context.getBasePileup().getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
|
||||
int rawCoverage = context.size();
|
||||
|
||||
IntervalStatisticLocus locusData = new IntervalStatisticLocus(mappingQualities, baseQualities, coverage, rawCoverage);
|
||||
currentIntervalStatistics.addLocus(refLocus, locusData);
|
||||
currentIntervalStatistics.addLocus(context); // Add current locus to stats
|
||||
|
||||
return 1L;
|
||||
}
|
||||
|
|
@ -129,6 +186,13 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
return 0L;
|
||||
}
|
||||
|
||||
/**
|
||||
* Not sure what we are going to do here
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return a long
|
||||
*/
|
||||
@Override
|
||||
public Long reduce(Long value, Long sum) {
|
||||
return sum + value;
|
||||
|
|
@ -136,14 +200,25 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
|
||||
@Override
|
||||
public void onTraversalDone(Long result) {
|
||||
super.onTraversalDone(result);
|
||||
out.println("Interval\tCallStatus\tCOV\tAVG");
|
||||
for (GenomeLoc interval : intervalList) {
|
||||
IntervalStatistics stats = intervalMap.get(interval);
|
||||
out.println(String.format("%s\t%s\t%d\t%f", interval, stats.callableStatus(), stats.totalCoverage(), stats.averageCoverage()));
|
||||
}
|
||||
for (GenomeLoc interval : intervalMap.keySet())
|
||||
processIntervalStats(interval, Allele.create("<DT>", true));
|
||||
}
|
||||
|
||||
@Override
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() {return null;}
|
||||
|
||||
@Override
|
||||
public RodBinding<VariantContext> getDbsnpRodBinding() {return null;}
|
||||
|
||||
@Override
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() {return null;}
|
||||
|
||||
@Override
|
||||
public List<RodBinding<VariantContext>> getResourceRodBindings() {return null;}
|
||||
|
||||
@Override
|
||||
public boolean alwaysAppendDbsnpId() {return false;}
|
||||
|
||||
private GenomeLoc createIntervalBefore(GenomeLoc interval) {
|
||||
int start = Math.max(interval.getStart() - expandInterval, 0);
|
||||
int stop = Math.max(interval.getStart() - 1, 0);
|
||||
|
|
@ -157,16 +232,75 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
return parser.createGenomeLoc(interval.getContig(), interval.getContigIndex(), start, stop);
|
||||
}
|
||||
|
||||
private void addAndExpandIntervalToLists(GenomeLoc interval) {
|
||||
/**
|
||||
* Takes an interval and commits it to memory.
|
||||
* It will expand it if so told by the -exp command line argument
|
||||
*
|
||||
* @param interval The new interval to process
|
||||
*/
|
||||
private void addAndExpandIntervalToMap(GenomeLoc interval) {
|
||||
if (expandInterval > 0) {
|
||||
GenomeLoc before = createIntervalBefore(interval);
|
||||
GenomeLoc after = createIntervalAfter(interval);
|
||||
intervalList.add(before);
|
||||
intervalList.add(after);
|
||||
intervalMap.put(before, new IntervalStatistics(before, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
|
||||
intervalMap.put(after, new IntervalStatistics(after, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
|
||||
intervalMap.put(before, new IntervalStatistics(samples, before, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
|
||||
intervalMap.put(after, new IntervalStatistics(samples, after, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
|
||||
}
|
||||
intervalList.add(interval);
|
||||
intervalMap.put(interval, new IntervalStatistics(interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
|
||||
if (!intervalList.contains(interval))
|
||||
intervalList.add(interval);
|
||||
intervalMap.put(interval, new IntervalStatistics(samples, interval, minimumCoverage, maximumCoverage, minimumMappingQuality, minimumBaseQuality));
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes the interval, finds it in the stash, prints it to the VCF, and removes it
|
||||
*
|
||||
* @param interval The interval in memory that you want to write out and clear
|
||||
* @param allele the allele
|
||||
*/
|
||||
private void processIntervalStats(GenomeLoc interval, Allele allele) {
|
||||
IntervalStatistics stats = intervalMap.get(interval);
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
|
||||
|
||||
alleles.add(allele);
|
||||
VariantContextBuilder vcb = new VariantContextBuilder("DiagnoseTargets", interval.getContig(), interval.getStart(), interval.getStop(), alleles);
|
||||
|
||||
vcb = vcb.log10PError(VariantContext.NO_LOG10_PERROR); // QUAL field makes no sense in our VCF
|
||||
vcb.filters(statusesToStrings(stats.callableStatuses()));
|
||||
|
||||
attributes.put(VCFConstants.END_KEY, interval.getStop());
|
||||
attributes.put(VCFConstants.DEPTH_KEY, stats.totalCoverage());
|
||||
attributes.put("AV", stats.averageCoverage());
|
||||
|
||||
vcb = vcb.attributes(attributes);
|
||||
|
||||
for (String sample : samples) {
|
||||
Map<String, Object> infos = new HashMap<String, Object>();
|
||||
infos.put("DP", stats.getSample(sample).totalCoverage());
|
||||
infos.put("AV", stats.getSample(sample).averageCoverage());
|
||||
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.addAll(statusesToStrings(stats.getSample(sample).getCallableStatuses()));
|
||||
|
||||
|
||||
genotypes.add(new Genotype(sample, alleles, VariantContext.NO_LOG10_PERROR, filters, infos, false));
|
||||
}
|
||||
vcb = vcb.genotypes(genotypes);
|
||||
|
||||
vcfWriter.add(vcb.make());
|
||||
|
||||
intervalMap.remove(interval);
|
||||
}
|
||||
|
||||
private static Set<String> statusesToStrings(Set<CallableStatus> statuses) {
|
||||
Set<String> output = new HashSet<String>(statuses.size());
|
||||
|
||||
for (CallableStatus status : statuses)
|
||||
output.add(status.name());
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
/**
|
||||
* The definition of a locus for the DiagnoseTargets walker statistics calculation
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/3/12
|
||||
*/
|
||||
class IntervalStatisticLocus {
|
||||
private final byte[] mappingQuality;
|
||||
private final byte[] baseQuality;
|
||||
private final int coverage;
|
||||
private final int rawCoverage;
|
||||
|
||||
public IntervalStatisticLocus(byte[] mappingQuality, byte[] baseQuality, int coverage, int rawCoverage) {
|
||||
this.mappingQuality = mappingQuality;
|
||||
this.baseQuality = baseQuality;
|
||||
this.coverage = coverage;
|
||||
this.rawCoverage = rawCoverage;
|
||||
}
|
||||
|
||||
public IntervalStatisticLocus() {
|
||||
this(new byte[1], new byte[1], 0, 0);
|
||||
}
|
||||
|
||||
public int getCoverage() {
|
||||
return coverage;
|
||||
}
|
||||
|
||||
public int getRawCoverage() {
|
||||
return rawCoverage;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,44 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/1/12
|
||||
*/
|
||||
class IntervalStatistics {
|
||||
public class IntervalStatistics {
|
||||
|
||||
private final Map<String, SampleStatistics> samples;
|
||||
private final GenomeLoc interval;
|
||||
private final ArrayList<IntervalStatisticLocus> loci;
|
||||
|
||||
private final int minimumCoverageThreshold;
|
||||
private final int maximumCoverageThreshold;
|
||||
private final int minimumMappingQuality;
|
||||
private final int minimumBaseQuality;
|
||||
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
|
||||
|
||||
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
|
||||
|
||||
private IntervalStatistics(GenomeLoc interval, ArrayList<IntervalStatisticLocus> loci, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
public IntervalStatistics(Set<String> samples, GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
this.interval = interval;
|
||||
this.loci = loci;
|
||||
this.minimumCoverageThreshold = minimumCoverageThreshold;
|
||||
this.maximumCoverageThreshold = maximumCoverageThreshold;
|
||||
this.minimumMappingQuality = minimumMappingQuality;
|
||||
this.minimumBaseQuality = minimumBaseQuality;
|
||||
this.samples = new HashMap<String, SampleStatistics>(samples.size());
|
||||
for (String sample : samples)
|
||||
this.samples.put(sample, new SampleStatistics(interval, minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality));
|
||||
}
|
||||
|
||||
public IntervalStatistics(GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
this(interval, new ArrayList<IntervalStatisticLocus>(interval.size()), minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality);
|
||||
public SampleStatistics getSample(String sample) {
|
||||
return samples.get(sample);
|
||||
}
|
||||
|
||||
// Initialize every loci (this way we don't have to worry about non-existent loci in the object
|
||||
for (int i = 0; i < interval.size(); i++)
|
||||
this.loci.add(i, new IntervalStatisticLocus());
|
||||
public void addLocus(AlignmentContext context) {
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
|
||||
for (String sample : samples.keySet())
|
||||
getSample(sample).addLocus(context.getLocation(), pileup.getPileupForSample(sample));
|
||||
}
|
||||
|
||||
public long totalCoverage() {
|
||||
|
|
@ -50,73 +68,27 @@ class IntervalStatistics {
|
|||
public double averageCoverage() {
|
||||
if (preComputedTotalCoverage < 0)
|
||||
calculateTotalCoverage();
|
||||
return (double) preComputedTotalCoverage / loci.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the callable status of the entire interval
|
||||
*
|
||||
* @return the callable status of the entire interval
|
||||
*/
|
||||
public CallableStatus callableStatus() {
|
||||
long max = -1;
|
||||
CallableStatus maxCallableStatus = null;
|
||||
HashMap<CallableStatus, Integer> statusCounts = new HashMap<CallableStatus, Integer>(CallableStatus.values().length);
|
||||
|
||||
// initialize the statusCounts with all callable states
|
||||
for (CallableStatus key : CallableStatus.values())
|
||||
statusCounts.put(key, 0);
|
||||
|
||||
// calculate the callable status for each locus
|
||||
for (int i = 0; i < loci.size(); i++) {
|
||||
CallableStatus status = callableStatus(i);
|
||||
int count = statusCounts.get(status) + 1;
|
||||
statusCounts.put(status, count);
|
||||
|
||||
if (count > max) {
|
||||
max = count;
|
||||
maxCallableStatus = status;
|
||||
}
|
||||
}
|
||||
|
||||
return maxCallableStatus;
|
||||
}
|
||||
|
||||
public void addLocus(GenomeLoc locus, IntervalStatisticLocus locusData) {
|
||||
if (!interval.containsP(locus))
|
||||
throw new ReviewedStingException(String.format("Locus %s is not part of the Interval", locus));
|
||||
|
||||
int locusIndex = locus.getStart() - interval.getStart();
|
||||
|
||||
loci.add(locusIndex, locusData);
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the callable status of this locus without taking the reference base into account.
|
||||
*
|
||||
* @param locusIndex location in the genome to inquire (only one locus)
|
||||
* @return the callable status of a locus
|
||||
*/
|
||||
private CallableStatus callableStatus(int locusIndex) {
|
||||
if (loci.get(locusIndex).getCoverage() > maximumCoverageThreshold)
|
||||
return CallableStatus.EXCESSIVE_COVERAGE;
|
||||
|
||||
if (loci.get(locusIndex).getCoverage() >= minimumCoverageThreshold)
|
||||
return CallableStatus.CALLABLE;
|
||||
|
||||
if (loci.get(locusIndex).getRawCoverage() >= minimumCoverageThreshold)
|
||||
return CallableStatus.POOR_QUALITY;
|
||||
|
||||
if (loci.get(locusIndex).getRawCoverage() > 0)
|
||||
return CallableStatus.LOW_COVERAGE;
|
||||
|
||||
return CallableStatus.NO_COVERAGE;
|
||||
return (double) preComputedTotalCoverage / interval.size();
|
||||
}
|
||||
|
||||
private void calculateTotalCoverage() {
|
||||
preComputedTotalCoverage = 0;
|
||||
for (IntervalStatisticLocus locus : loci)
|
||||
preComputedTotalCoverage += locus.getCoverage();
|
||||
for (SampleStatistics sample : samples.values())
|
||||
preComputedTotalCoverage += sample.totalCoverage();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the Callable statuses for the interval as a whole
|
||||
* todo -- add a voting system for sample flags and add interval specific statuses
|
||||
*
|
||||
* @return the callable status(es) for the whole interval
|
||||
*/
|
||||
public Set<CallableStatus> callableStatuses() {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
for (SampleStatistics sample : samples.values())
|
||||
output.addAll(sample.getCallableStatuses());
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class LocusStatistics {
|
||||
final int coverage;
|
||||
final int rawCoverage;
|
||||
|
||||
public LocusStatistics() {
|
||||
this.coverage = 0;
|
||||
this.rawCoverage = 0;
|
||||
}
|
||||
|
||||
public LocusStatistics(int coverage, int rawCoverage) {
|
||||
this.coverage = coverage;
|
||||
this.rawCoverage = rawCoverage;
|
||||
}
|
||||
|
||||
public int getCoverage() {
|
||||
return coverage;
|
||||
}
|
||||
|
||||
public int getRawCoverage() {
|
||||
return rawCoverage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates all applicable statuses from the coverages in this locus
|
||||
*
|
||||
* @param minimumCoverageThreshold the minimum threshold for determining low coverage/poor quality
|
||||
* @param maximumCoverageThreshold the maximum threshold for determining excessive coverage
|
||||
* @return a set of all statuses that apply
|
||||
*/
|
||||
public Set<CallableStatus> callableStatuses(int minimumCoverageThreshold, int maximumCoverageThreshold) {
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// if too much coverage
|
||||
if (getCoverage() > maximumCoverageThreshold)
|
||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||
|
||||
// if not enough coverage
|
||||
if (getCoverage() < minimumCoverageThreshold) {
|
||||
// was there a lot of low Qual coverage?
|
||||
if (getRawCoverage() >= minimumCoverageThreshold)
|
||||
output.add(CallableStatus.POOR_QUALITY);
|
||||
// no?
|
||||
else {
|
||||
// is there any coverage?
|
||||
if (getRawCoverage() > 0)
|
||||
output.add(CallableStatus.LOW_COVERAGE);
|
||||
else
|
||||
output.add(CallableStatus.NO_COVERAGE);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Short one line description of the walker.
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/1/12
|
||||
*/
|
||||
class SampleStatistics {
|
||||
private final GenomeLoc interval;
|
||||
private final ArrayList<LocusStatistics> loci;
|
||||
|
||||
private final int minimumCoverageThreshold;
|
||||
private final int maximumCoverageThreshold;
|
||||
private final int minimumMappingQuality;
|
||||
private final int minimumBaseQuality;
|
||||
|
||||
private int preComputedTotalCoverage = -1; // avoids re-calculating the total sum (-1 means we haven't pre-computed it yet)
|
||||
|
||||
private SampleStatistics(GenomeLoc interval, ArrayList<LocusStatistics> loci, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
this.interval = interval;
|
||||
this.loci = loci;
|
||||
this.minimumCoverageThreshold = minimumCoverageThreshold;
|
||||
this.maximumCoverageThreshold = maximumCoverageThreshold;
|
||||
this.minimumMappingQuality = minimumMappingQuality;
|
||||
this.minimumBaseQuality = minimumBaseQuality;
|
||||
}
|
||||
|
||||
public SampleStatistics(GenomeLoc interval, int minimumCoverageThreshold, int maximumCoverageThreshold, int minimumMappingQuality, int minimumBaseQuality) {
|
||||
this(interval, new ArrayList<LocusStatistics>(interval.size()), minimumCoverageThreshold, maximumCoverageThreshold, minimumMappingQuality, minimumBaseQuality);
|
||||
|
||||
// Initialize every loci (this way we don't have to worry about non-existent loci in the object
|
||||
for (int i = 0; i < interval.size(); i++)
|
||||
this.loci.add(i, new LocusStatistics());
|
||||
|
||||
}
|
||||
|
||||
public long totalCoverage() {
|
||||
if (preComputedTotalCoverage < 0)
|
||||
calculateTotalCoverage();
|
||||
return preComputedTotalCoverage;
|
||||
}
|
||||
|
||||
public double averageCoverage() {
|
||||
if (preComputedTotalCoverage < 0)
|
||||
calculateTotalCoverage();
|
||||
return (double) preComputedTotalCoverage / loci.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the callable statuses of the entire interval
|
||||
*
|
||||
* @return the callable statuses of the entire interval
|
||||
*/
|
||||
public Set<CallableStatus> getCallableStatuses() {
|
||||
|
||||
Map<CallableStatus, Integer> totals = new HashMap<CallableStatus, Integer>(CallableStatus.values().length);
|
||||
|
||||
// initialize map
|
||||
for (CallableStatus status : CallableStatus.values())
|
||||
totals.put(status, 0);
|
||||
|
||||
// sum up all the callable statuses for each locus
|
||||
for (int i = 0; i < interval.size(); i++) {
|
||||
for (CallableStatus status : callableStatus(i)) {
|
||||
int count = totals.get(status);
|
||||
|
||||
totals.put(status, count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Set<CallableStatus> output = new HashSet<CallableStatus>();
|
||||
|
||||
// double to avoid type casting
|
||||
double intervalSize = interval.size();
|
||||
|
||||
double coverageStatusThreshold = 0.20;
|
||||
if ((totals.get(CallableStatus.NO_COVERAGE) / intervalSize) > coverageStatusThreshold)
|
||||
output.add(CallableStatus.NO_COVERAGE);
|
||||
|
||||
if ((totals.get(CallableStatus.LOW_COVERAGE) / intervalSize) > coverageStatusThreshold)
|
||||
output.add(CallableStatus.LOW_COVERAGE);
|
||||
|
||||
double excessiveCoverageThreshold = 0.20;
|
||||
if ((totals.get(CallableStatus.EXCESSIVE_COVERAGE) / intervalSize) > excessiveCoverageThreshold)
|
||||
output.add(CallableStatus.EXCESSIVE_COVERAGE);
|
||||
|
||||
double qualityStatusThreshold = 0.50;
|
||||
if ((totals.get(CallableStatus.POOR_QUALITY) / intervalSize) > qualityStatusThreshold)
|
||||
output.add(CallableStatus.POOR_QUALITY);
|
||||
|
||||
if (totals.get(CallableStatus.REF_N) > 0)
|
||||
output.add(CallableStatus.REF_N);
|
||||
|
||||
if (output.isEmpty()) {
|
||||
output.add(CallableStatus.PASS);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a locus to the interval wide stats
|
||||
*
|
||||
* @param locus The locus given as a GenomeLoc
|
||||
* @param pileup The pileup of that locus
|
||||
*/
|
||||
public void addLocus(GenomeLoc locus, ReadBackedPileup pileup) {
|
||||
if (!interval.containsP(locus))
|
||||
throw new ReviewedStingException(String.format("Locus %s is not part of the Interval", locus));
|
||||
|
||||
// a null pileup means there nothing ot add
|
||||
if (pileup != null) {
|
||||
|
||||
int locusIndex = locus.getStart() - interval.getStart();
|
||||
|
||||
int rawCoverage = pileup.depthOfCoverage();
|
||||
int coverage = pileup.getBaseAndMappingFilteredPileup(minimumBaseQuality, minimumMappingQuality).depthOfCoverage();
|
||||
|
||||
LocusStatistics locusData = new LocusStatistics(coverage, rawCoverage);
|
||||
|
||||
loci.add(locusIndex, locusData);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the callable status of this locus without taking the reference base into account.
|
||||
*
|
||||
* @param locusIndex location in the genome to inquire (only one locus)
|
||||
* @return the callable status of a locus
|
||||
*/
|
||||
private Set<CallableStatus> callableStatus(int locusIndex) {
|
||||
LocusStatistics locus = loci.get(locusIndex);
|
||||
|
||||
return locus.callableStatuses(minimumCoverageThreshold, maximumCoverageThreshold);
|
||||
}
|
||||
|
||||
|
||||
private void calculateTotalCoverage() {
|
||||
preComputedTotalCoverage = 0;
|
||||
for (LocusStatistics locus : loci)
|
||||
preComputedTotalCoverage += locus.getCoverage();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
|
|
@ -42,12 +41,13 @@ public class AlleleFrequencyCalculationResult {
|
|||
// These variables are intended to contain the MLE and MAP (and their corresponding allele counts) of the site over all alternate alleles
|
||||
private double log10MLE;
|
||||
private double log10MAP;
|
||||
final private int[] alleleCountsOfMLE;
|
||||
final private int[] alleleCountsOfMAP;
|
||||
private final int[] alleleCountsOfMLE;
|
||||
private final int[] alleleCountsOfMAP;
|
||||
|
||||
// The posteriors seen, not including that of AF=0
|
||||
// TODO -- better implementation needed here (see below)
|
||||
private ArrayList<Double> log10PosteriorMatrixValues = new ArrayList<Double>(100000);
|
||||
private static final int POSTERIORS_CACHE_SIZE = 5000;
|
||||
private final double[] log10PosteriorMatrixValues = new double[POSTERIORS_CACHE_SIZE];
|
||||
private int currentPosteriorsCacheIndex = 0;
|
||||
private Double log10PosteriorMatrixSum = null;
|
||||
|
||||
// These variables are intended to contain the likelihood/posterior probability for the site's being monomorphic (i.e. AF=0 for all alternate alleles)
|
||||
|
|
@ -69,14 +69,9 @@ public class AlleleFrequencyCalculationResult {
|
|||
return log10MAP;
|
||||
}
|
||||
|
||||
public double getLog10PosteriorMatrixSum() {
|
||||
public double getLog10PosteriorsMatrixSumWithoutAFzero() {
|
||||
if ( log10PosteriorMatrixSum == null ) {
|
||||
// TODO -- we absolutely need a better implementation here as we don't want to store all values from the matrix in memory;
|
||||
// TODO -- will discuss with the team what the best option is
|
||||
final double[] tmp = new double[log10PosteriorMatrixValues.size()];
|
||||
for ( int i = 0; i < tmp.length; i++ )
|
||||
tmp[i] = log10PosteriorMatrixValues.get(i);
|
||||
log10PosteriorMatrixSum = MathUtils.log10sumLog10(tmp);
|
||||
log10PosteriorMatrixSum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex);
|
||||
}
|
||||
return log10PosteriorMatrixSum;
|
||||
}
|
||||
|
|
@ -103,7 +98,7 @@ public class AlleleFrequencyCalculationResult {
|
|||
alleleCountsOfMLE[i] = 0;
|
||||
alleleCountsOfMAP[i] = 0;
|
||||
}
|
||||
log10PosteriorMatrixValues.clear();
|
||||
currentPosteriorsCacheIndex = 0;
|
||||
log10PosteriorMatrixSum = null;
|
||||
}
|
||||
|
||||
|
|
@ -116,7 +111,8 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
|
||||
public void updateMAPifNeeded(final double log10LofK, final int[] alleleCountsForK) {
|
||||
log10PosteriorMatrixValues.add(log10LofK);
|
||||
addToPosteriorsCache(log10LofK);
|
||||
|
||||
if ( log10LofK > log10MAP ) {
|
||||
log10MAP = log10LofK;
|
||||
for ( int i = 0; i < alleleCountsForK.length; i++ )
|
||||
|
|
@ -124,6 +120,18 @@ public class AlleleFrequencyCalculationResult {
|
|||
}
|
||||
}
|
||||
|
||||
private void addToPosteriorsCache(final double log10LofK) {
|
||||
// add to the cache
|
||||
log10PosteriorMatrixValues[currentPosteriorsCacheIndex++] = log10LofK;
|
||||
|
||||
// if we've filled up the cache, then condense by summing up all of the values and placing the sum back into the first cell
|
||||
if ( currentPosteriorsCacheIndex == POSTERIORS_CACHE_SIZE ) {
|
||||
final double temporarySum = MathUtils.log10sumLog10(log10PosteriorMatrixValues, 0, currentPosteriorsCacheIndex);
|
||||
log10PosteriorMatrixValues[0] = temporarySum;
|
||||
currentPosteriorsCacheIndex = 1;
|
||||
}
|
||||
}
|
||||
|
||||
public void setLog10LikelihoodOfAFzero(final double log10LikelihoodOfAFzero) {
|
||||
this.log10LikelihoodOfAFzero = log10LikelihoodOfAFzero;
|
||||
if ( log10LikelihoodOfAFzero > log10MLE ) {
|
||||
|
|
|
|||
|
|
@ -72,6 +72,8 @@ import static java.lang.Math.pow;
|
|||
*/
|
||||
public class DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering implements Cloneable {
|
||||
|
||||
public final static double DEFAULT_PCR_ERROR_RATE = 1e-4;
|
||||
|
||||
protected final static int FIXED_PLOIDY = 2;
|
||||
protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1;
|
||||
protected final static double ploidyAdjustment = log10(FIXED_PLOIDY);
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class UnifiedArgumentCollection {
|
|||
* het = 1e-3, P(hom-ref genotype) = 1 - 3 * het / 2, P(het genotype) = het, P(hom-var genotype) = het / 2
|
||||
*/
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
|
||||
public Double heterozygosity = DiploidSNPGenotypePriors.HUMAN_HETEROZYGOSITY;
|
||||
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
|
||||
|
||||
/**
|
||||
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
||||
|
|
@ -53,7 +53,7 @@ public class UnifiedArgumentCollection {
|
|||
* effectively acts as a cap on the base qualities.
|
||||
*/
|
||||
@Argument(fullName = "pcr_error_rate", shortName = "pcr_error", doc = "The PCR error rate to be used for computing fragment-based likelihoods", required = false)
|
||||
public Double PCR_error = DiploidSNPGenotypeLikelihoods.DEFAULT_PCR_ERROR_RATE;
|
||||
public Double PCR_error = DiploidSNPGenotypeLikelihoodsWithCorrectAlleleOrdering.DEFAULT_PCR_ERROR_RATE;
|
||||
|
||||
/**
|
||||
* Specifies how to determine the alternate allele to use for genotyping
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -126,8 +127,19 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
@ArgumentCollection
|
||||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
||||
|
||||
/**
|
||||
* If a call overlaps with a record from the provided comp track, the INFO field will be annotated
|
||||
* as such in the output with the track name (e.g. -comp:FOO will have 'FOO' in the INFO field).
|
||||
* Records that are filtered in the comp track will be ignored.
|
||||
* Note that 'dbSNP' has been special-cased (see the --dbsnp argument).
|
||||
*/
|
||||
@Input(fullName="comp", shortName = "comp", doc="comparison VCF file", required=false)
|
||||
public List<RodBinding<VariantContext>> comps = Collections.emptyList();
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return comps; }
|
||||
|
||||
// The following are not used by the Unified Genotyper
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||
public boolean alwaysAppendDbsnpId() { return false; }
|
||||
|
||||
|
|
@ -216,7 +228,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
|
||||
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
// initialize the header
|
||||
Set<VCFHeaderLine> headerInfo = getHeaderInfo();
|
||||
|
|
|
|||
|
|
@ -50,8 +50,9 @@ import java.util.*;
|
|||
|
||||
public class UnifiedGenotyperEngine {
|
||||
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
|
||||
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
|
||||
public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3;
|
||||
public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4;
|
||||
|
||||
public enum OUTPUT_MODE {
|
||||
/** produces calls only at variant sites */
|
||||
|
|
@ -111,7 +112,7 @@ public class UnifiedGenotyperEngine {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
@Requires({"toolkit != null", "UAC != null"})
|
||||
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
||||
this(toolkit, UAC, Logger.getLogger(UnifiedGenotyperEngine.class), null, null, SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()), VariantContextUtils.DEFAULT_PLOIDY*(SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()).size()));
|
||||
}
|
||||
|
||||
@Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0","ploidy>0"})
|
||||
|
|
@ -326,7 +327,7 @@ public class UnifiedGenotyperEngine {
|
|||
} else {
|
||||
phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
|
||||
if ( Double.isInfinite(phredScaledConfidence) ) {
|
||||
final double sum = AFresult.getLog10PosteriorMatrixSum();
|
||||
final double sum = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
|
||||
}
|
||||
}
|
||||
|
|
@ -369,7 +370,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
double overallLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
double overallLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
List<Allele> alternateAllelesToUse = builder.make().getAlternateAlleles();
|
||||
|
|
@ -380,7 +381,7 @@ public class UnifiedGenotyperEngine {
|
|||
afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult);
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double forwardLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double forwardLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
double forwardLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
|
|
@ -389,7 +390,7 @@ public class UnifiedGenotyperEngine {
|
|||
afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult);
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
double reverseLog10PofNull = AFresult.getLog10PosteriorOfAFzero();
|
||||
double reverseLog10PofF = AFresult.getLog10PosteriorMatrixSum();
|
||||
double reverseLog10PofF = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
|
|
@ -424,7 +425,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
public static double[] generateNormalizedPosteriors(final AlleleFrequencyCalculationResult AFresult, final double[] normalizedPosteriors) {
|
||||
normalizedPosteriors[0] = AFresult.getLog10PosteriorOfAFzero();
|
||||
normalizedPosteriors[1] = AFresult.getLog10PosteriorMatrixSum();
|
||||
normalizedPosteriors[1] = AFresult.getLog10PosteriorsMatrixSumWithoutAFzero();
|
||||
return MathUtils.normalizeFromLog10(normalizedPosteriors);
|
||||
}
|
||||
|
||||
|
|
@ -622,8 +623,6 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
}
|
||||
|
||||
public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3;
|
||||
public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4;
|
||||
protected double getTheta( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
if( model.name().contains("SNP") )
|
||||
return HUMAN_SNP_HETEROZYGOSITY;
|
||||
|
|
|
|||
|
|
@ -207,7 +207,9 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
||||
// TODO - MIXED, SYMBOLIC, and MNP records are skipped over
|
||||
//throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
import net.sf.picard.liftover.LiftOver;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
|
@ -73,7 +72,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
public void initialize() {
|
||||
try {
|
||||
liftOver = new LiftOver(CHAIN);
|
||||
} catch (PicardException e) {
|
||||
} catch (RuntimeException e) {
|
||||
throw new UserException.BadInput("there is a problem with the chain file you are using: " + e.getMessage());
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +81,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
try {
|
||||
final SAMFileHeader toHeader = new SAMFileReader(NEW_SEQ_DICT).getFileHeader();
|
||||
liftOver.validateToSequences(toHeader.getSequenceDictionary());
|
||||
} catch (PicardException e) {
|
||||
} catch (RuntimeException e) {
|
||||
throw new UserException.BadInput("the chain file you are using is not compatible with the reference you are trying to lift over to; please use the appropriate chain file for the given reference");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -237,7 +237,7 @@ public class MathUtils {
|
|||
public static double log10sumLog10(double[] log10p, int start, int finish) {
|
||||
double sum = 0.0;
|
||||
|
||||
double maxValue = Utils.findMaxEntry(log10p);
|
||||
double maxValue = arrayMax(log10p, finish);
|
||||
if(maxValue == Double.NEGATIVE_INFINITY)
|
||||
return sum;
|
||||
|
||||
|
|
@ -554,7 +554,7 @@ public class MathUtils {
|
|||
|
||||
// for precision purposes, we need to add (or really subtract, since they're
|
||||
// all negative) the largest value; also, we need to convert to normal-space.
|
||||
double maxValue = Utils.findMaxEntry(array);
|
||||
double maxValue = arrayMax(array);
|
||||
|
||||
// we may decide to just normalize in log space without converting to linear space
|
||||
if (keepInLogSpace) {
|
||||
|
|
@ -627,10 +627,14 @@ public class MathUtils {
|
|||
return maxI;
|
||||
}
|
||||
|
||||
public static double arrayMax(double[] array) {
|
||||
public static double arrayMax(final double[] array) {
|
||||
return array[maxElementIndex(array)];
|
||||
}
|
||||
|
||||
public static double arrayMax(final double[] array, final int endIndex) {
|
||||
return array[maxElementIndex(array, endIndex)];
|
||||
}
|
||||
|
||||
public static double arrayMin(double[] array) {
|
||||
return array[minElementIndex(array)];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -290,32 +290,6 @@ public class Utils {
|
|||
return m;
|
||||
}
|
||||
|
||||
|
||||
// returns the maximum value in the array
|
||||
public static double findMaxEntry(double[] array) {
|
||||
return findIndexAndMaxEntry(array).first;
|
||||
}
|
||||
|
||||
// returns the index of the maximum value in the array
|
||||
public static int findIndexOfMaxEntry(double[] array) {
|
||||
return findIndexAndMaxEntry(array).second;
|
||||
}
|
||||
|
||||
// returns the the maximum value and its index in the array
|
||||
private static Pair<Double, Integer> findIndexAndMaxEntry(double[] array) {
|
||||
if ( array.length == 0 )
|
||||
return new Pair<Double, Integer>(0.0, -1);
|
||||
int index = 0;
|
||||
double max = array[0];
|
||||
for (int i = 1; i < array.length; i++) {
|
||||
if ( array[i] > max ) {
|
||||
max = array[i];
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
return new Pair<Double, Integer>(max, index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits expressions in command args by spaces and returns the array of expressions.
|
||||
* Expressions may use single or double quotes to group any individual expression, but not both.
|
||||
|
|
|
|||
|
|
@ -278,6 +278,10 @@ public class GenotypeLikelihoods {
|
|||
|
||||
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
||||
|
||||
// fast, closed form solution for diploid samples (most common use case)
|
||||
if (ploidy==2)
|
||||
return numAlleles*(numAlleles+1)/2;
|
||||
|
||||
if (numAlleles == 1)
|
||||
return 1;
|
||||
else if (ploidy == 1)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import org.apache.commons.jexl2.JexlEngine;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
|
@ -48,6 +47,8 @@ public class VariantContextUtils {
|
|||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||
|
||||
final public static JexlEngine engine = new JexlEngine();
|
||||
public static final int DEFAULT_PLOIDY = 2;
|
||||
|
||||
static {
|
||||
engine.setSilent(false); // will throw errors now for selects that don't evaluate properly
|
||||
engine.setLenient(false);
|
||||
|
|
@ -1123,7 +1124,7 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
// calculateNumLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(1+numOriginalAltAlleles, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
||||
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(1+numOriginalAltAlleles, DEFAULT_PLOIDY);
|
||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||
// consider this entry only if both of the alleles are good
|
||||
|
|
|
|||
|
|
@ -142,6 +142,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("71251d8893649ea9abd5d9aa65739ba1"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameter() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
|
|
|
|||
|
|
@ -284,6 +284,18 @@ public class MathUtilsUnitTest extends BaseTest {
|
|||
Assert.assertTrue(compareDoubleArrays(MathUtils.normalizeFromLog10(new double[] {-1.0, -3.0, -1.0, -2.0}), new double[] {0.1 * 1.0 / 0.211, 0.001 * 1.0 / 0.211, 0.1 * 1.0 / 0.211, 0.01 * 1.0 / 0.211}));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLog10sumLog10() {
|
||||
final double log3 = 0.477121254719662;
|
||||
Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[]{0.0, 0.0, 0.0}), log3), 0);
|
||||
Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0), log3), 0);
|
||||
Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[]{0.0, 0.0, 0.0}, 0, 3), log3), 0);
|
||||
|
||||
final double log2 = 0.301029995663981;
|
||||
Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0, 2), log2), 0);
|
||||
Assert.assertEquals(MathUtils.compareDoubles(MathUtils.log10sumLog10(new double[] {0.0, 0.0, 0.0}, 0, 1), 0.0), 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDotProduct() {
|
||||
Assert.assertEquals(MathUtils.dotProduct(new Double[]{-5.0,-3.0,2.0}, new Double[]{6.0,7.0,8.0}),-35.0,1e-3);
|
||||
|
|
|
|||
Loading…
Reference in New Issue