ReduceReads is now scattered by contig
It's no longer safe to scatter/gather by interval because now we don't hard-clip to the intervals anymore.
This commit is contained in:
parent
bdda63d973
commit
8a115edbaf
|
|
@ -85,7 +85,7 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Emits intervals present in either the original or reduced bam but not the other.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The original and reduced BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A list of intervals present in one bam but not the other.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -I:original original.bam \
|
||||
* -I:reduced reduced.bam \
|
||||
* -R ref.fasta \
|
||||
* -T AssessReducedCoverage \
|
||||
* -o output.intervals
|
||||
* </pre>
|
||||
*
|
||||
* @author ebanks
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||
@Hidden
|
||||
public class AssessReducedCoverage extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||
|
||||
private static final String original = "original";
|
||||
private static final String reduced = "reduced";
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
@Argument(fullName = "output_reduced_only_coverage", shortName = "output_reduced_only_coverage", doc = "Output an interval if the reduced bam has coverage where the original does not", required = false)
|
||||
public boolean OUTPUT_REDUCED_ONLY_INTERVALS = false;
|
||||
|
||||
public void initialize() {}
|
||||
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
Set<String> tags = getAllTags(context.getBasePileup());
|
||||
return (tags.contains(original) && !tags.contains(reduced)) ||
|
||||
(OUTPUT_REDUCED_ONLY_INTERVALS && tags.contains(reduced) && !tags.contains(original)) ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
private Set<String> getAllTags(final ReadBackedPileup pileup) {
|
||||
|
||||
final Set<String> tags = new HashSet<String>(10);
|
||||
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() )
|
||||
tags.addAll(getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags());
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||
if ( lhs == null )
|
||||
return rhs;
|
||||
|
||||
if ( rhs == null )
|
||||
return lhs;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( lhs.contiguousP(rhs) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||
|
||||
// otherwise, print the lhs and start over with the rhs
|
||||
out.println(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( sum.contiguousP(value) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||
|
||||
// otherwise, print the sum and start over with the value
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,183 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
|
||||
* the reduced bam are above sufficient threshold)
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The original and reduced BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -I:original original.bam \
|
||||
* -I:reduced reduced.bam \
|
||||
* -R ref.fasta \
|
||||
* -T AssessReducedQuals \
|
||||
* -o output.intervals
|
||||
* </pre>
|
||||
*
|
||||
* @author ami
|
||||
*/
|
||||
|
||||
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||
|
||||
private static final String reduced = "reduced";
|
||||
private static final int originalQualsIndex = 0;
|
||||
private static final int reducedQualsIndex = 1;
|
||||
|
||||
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
|
||||
public int sufficientQualSum = 600;
|
||||
|
||||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||
public int qual_epsilon = 0;
|
||||
|
||||
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug level: NO_DEBUG, PRINT_LOCI,PRINT_PILEUPS", required = false)
|
||||
public DebugLevel debugLevel = DebugLevel.NO_DEBUG;
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
public void initialize() {
|
||||
if (debugLevel != DebugLevel.NO_DEBUG)
|
||||
out.println(" Debug mode" +
|
||||
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
|
||||
"Debug:\tqual_epsilon: "+qual_epsilon);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
@Override
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
boolean reportLocus;
|
||||
final int[] quals = getPileupQuals(context.getBasePileup());
|
||||
if (debugLevel != DebugLevel.NO_DEBUG)
|
||||
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
|
||||
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
|
||||
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
|
||||
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
|
||||
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
|
||||
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
|
||||
if(debugLevel != DebugLevel.NO_DEBUG && reportLocus)
|
||||
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
|
||||
|
||||
return reportLocus ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
private int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||
|
||||
final int[] quals = new int[2];
|
||||
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
|
||||
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
|
||||
|
||||
for( PileupElement p : readPileup ){
|
||||
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
|
||||
if ( isGoodRead(p) ){
|
||||
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
|
||||
final int tagIndex = getTagIndex(tags);
|
||||
quals[tagIndex] += tempQual;
|
||||
if(debugLevel == DebugLevel.PRINT_PILEUPS)
|
||||
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
|
||||
}
|
||||
}
|
||||
if(debugLevel == DebugLevel.PRINT_PILEUPS){
|
||||
out.println(printPileup[originalQualsIndex]);
|
||||
out.println(printPileup[reducedQualsIndex]);
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
||||
|
||||
private boolean isGoodRead(PileupElement p){
|
||||
// TODO -- You want to check whether the read itself is a reduced read and only
|
||||
// TODO -- for them you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
|
||||
return !p.isDeletion() && ((p.getRead().isReducedRead()) || (!p.getRead().isReducedRead() && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||
}
|
||||
|
||||
private int getTagIndex(List<String> tags){
|
||||
return tags.contains(reduced) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||
if ( lhs == null )
|
||||
return rhs;
|
||||
|
||||
if ( rhs == null )
|
||||
return lhs;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( lhs.contiguousP(rhs) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||
|
||||
// otherwise, print the lhs and start over with the rhs
|
||||
out.println(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( sum.contiguousP(value) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||
|
||||
// otherwise, print the sum and start over with the value
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
|
||||
public enum DebugLevel {
|
||||
NO_DEBUG,
|
||||
/**
|
||||
* Print locus level information (such as locus quals) and loci with unmatch quals
|
||||
*/
|
||||
PRINT_LOCI,
|
||||
/**
|
||||
* Print the pileup infomarion of the reduced bam files and the original bam files
|
||||
*/
|
||||
PRINT_PILEUPS
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue