diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java index 629a27c48..2061c5364 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java @@ -85,7 +85,7 @@ import java.util.*; */ @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) -@PartitionBy(PartitionType.INTERVAL) +@PartitionBy(PartitionType.CONTIG) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class}) public class ReduceReads extends ReadWalker, ReduceReadsStash> { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java deleted file mode 100755 index d38c11594..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.qc; - -import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.gatk.CommandLineGATK; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.io.PrintStream; -import java.util.*; - -/** - * Emits intervals present in either the original or reduced bam but not the other. - * - *

Input

- *

- * The original and reduced BAM files. - *

- * - *

Output

- *

- * A list of intervals present in one bam but not the other. - *

- * - *

Examples

- *
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -I:original original.bam \
- *   -I:reduced reduced.bam \
- *   -R ref.fasta \
- *   -T AssessReducedCoverage \
- *   -o output.intervals
- * 
- * - * @author ebanks - */ -@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) -@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class}) -@Hidden -public class AssessReducedCoverage extends LocusWalker implements TreeReducible { - - private static final String original = "original"; - private static final String reduced = "reduced"; - - @Output - protected PrintStream out; - - @Override - public boolean includeReadsWithDeletionAtLoci() { return true; } - - @Argument(fullName = "output_reduced_only_coverage", shortName = "output_reduced_only_coverage", doc = "Output an interval if the reduced bam has coverage where the original does not", required = false) - public boolean OUTPUT_REDUCED_ONLY_INTERVALS = false; - - public void initialize() {} - - public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - - if ( tracker == null ) - return null; - - Set tags = getAllTags(context.getBasePileup()); - return (tags.contains(original) && !tags.contains(reduced)) || - (OUTPUT_REDUCED_ONLY_INTERVALS && tags.contains(reduced) && !tags.contains(original)) ? ref.getLocus() : null; - } - - private Set getAllTags(final ReadBackedPileup pileup) { - - final Set tags = new HashSet(10); - - for ( final PileupElement p : pileup ) { - if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() ) - tags.addAll(getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags()); - } - - return tags; - } - - public void onTraversalDone(GenomeLoc sum) { - if ( sum != null ) - out.println(sum); - } - - public GenomeLoc reduceInit() { - return null; - } - - public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { - if ( lhs == null ) - return rhs; - - if ( rhs == null ) - return lhs; - - // if contiguous, just merge them - if ( lhs.contiguousP(rhs) ) - return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop()); - - // otherwise, print the lhs and start over with the rhs - out.println(lhs); - return rhs; - } - - public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) { - if ( value == null ) - return sum; - - if ( sum == null ) - return value; - - // if contiguous, just merge them - if ( sum.contiguousP(value) ) - return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop()); - - // otherwise, print the sum and start over with the value - out.println(sum); - return value; - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java deleted file mode 100644 index 2c70d44e2..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ /dev/null @@ -1,183 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.qc; - -import org.broadinstitute.sting.commandline.Argument; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.TreeReducible; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.io.PrintStream; -import java.util.List; - -/** - * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of - * the reduced bam are above sufficient threshold) - * - *

Input

- *

- * The original and reduced BAM files. - *

- * - *

Output

- *

- * A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon. - *

- * - *

Examples

- *
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -I:original original.bam \
- *   -I:reduced reduced.bam \
- *   -R ref.fasta \
- *   -T AssessReducedQuals \
- *   -o output.intervals
- * 
- * - * @author ami - */ - -public class AssessReducedQuals extends LocusWalker implements TreeReducible { - - private static final String reduced = "reduced"; - private static final int originalQualsIndex = 0; - private static final int reducedQualsIndex = 1; - - @Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false) - public int sufficientQualSum = 600; - - @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false) - public int qual_epsilon = 0; - - @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug level: NO_DEBUG, PRINT_LOCI,PRINT_PILEUPS", required = false) - public DebugLevel debugLevel = DebugLevel.NO_DEBUG; - - @Output - protected PrintStream out; - - public void initialize() { - if (debugLevel != DebugLevel.NO_DEBUG) - out.println(" Debug mode" + - "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " + - "Debug:\tqual_epsilon: "+qual_epsilon); - } - - @Override - public boolean includeReadsWithDeletionAtLoci() { return true; } - - @Override - public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return null; - - boolean reportLocus; - final int[] quals = getPileupQuals(context.getBasePileup()); - if (debugLevel != DebugLevel.NO_DEBUG) - out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]); - final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon); - final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum); - final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum); - final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals; - reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon; - if(debugLevel != DebugLevel.NO_DEBUG && reportLocus) - out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff); - - return reportLocus ? ref.getLocus() : null; - } - - private int[] getPileupQuals(final ReadBackedPileup readPileup) { - - final int[] quals = new int[2]; - String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n", - "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"}; - - for( PileupElement p : readPileup ){ - final List tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags(); - if ( isGoodRead(p) ){ - final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount(); - final int tagIndex = getTagIndex(tags); - quals[tagIndex] += tempQual; - if(debugLevel == DebugLevel.PRINT_PILEUPS) - printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n"; - } - } - if(debugLevel == DebugLevel.PRINT_PILEUPS){ - out.println(printPileup[originalQualsIndex]); - out.println(printPileup[reducedQualsIndex]); - } - return quals; - } - - - private boolean isGoodRead(PileupElement p){ - // TODO -- You want to check whether the read itself is a reduced read and only - // TODO -- for them you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff). - return !p.isDeletion() && ((p.getRead().isReducedRead()) || (!p.getRead().isReducedRead() && (int)p.getQual() >= 20 && p.getMappingQual() >= 20)); - } - - private int getTagIndex(List tags){ - return tags.contains(reduced) ? 1 : 0; - } - - @Override - public void onTraversalDone(GenomeLoc sum) { - if ( sum != null ) - out.println(sum); - } - - @Override - public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) { - if ( lhs == null ) - return rhs; - - if ( rhs == null ) - return lhs; - - // if contiguous, just merge them - if ( lhs.contiguousP(rhs) ) - return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop()); - - // otherwise, print the lhs and start over with the rhs - out.println(lhs); - return rhs; - } - - @Override - public GenomeLoc reduceInit() { - return null; - } - - @Override - public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) { - if ( value == null ) - return sum; - - if ( sum == null ) - return value; - - // if contiguous, just merge them - if ( sum.contiguousP(value) ) - return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop()); - - // otherwise, print the sum and start over with the value - out.println(sum); - return value; - } - - public enum DebugLevel { - NO_DEBUG, - /** - * Print locus level information (such as locus quals) and loci with unmatch quals - */ - PRINT_LOCI, - /** - * Print the pileup infomarion of the reduced bam files and the original bam files - */ - PRINT_PILEUPS - } -}