From bb92e31118cd0342077ce1ef29d20fcca11cca58 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 16 Dec 2009 21:39:58 +0000 Subject: [PATCH] Optimizations: 1. push the ReadBackedPileup filtering up into the ReadFilters for read-based filters 2. stop querying the cigar for its length (just do it once) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2381 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/filters/BadMateReadFilter.java | 46 +++++++++++++++++++ .../filters/MappingQualityReadFilter.java | 46 +++++++++++++++++++ .../coverage/DepthOfCoverageWalker.java | 2 +- .../genotyper/UnifiedArgumentCollection.java | 6 --- .../walkers/genotyper/UnifiedGenotyper.java | 21 +++++---- .../sting/utils/AlignmentUtils.java | 22 +++++---- .../sting/utils/pileup/ReadBackedPileup.java | 5 ++ 7 files changed, 122 insertions(+), 26 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/filters/BadMateReadFilter.java create mode 100755 java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java diff --git a/java/src/org/broadinstitute/sting/gatk/filters/BadMateReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/BadMateReadFilter.java new file mode 100755 index 000000000..2abe7a7a7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/BadMateReadFilter.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.cmdLine.Argument; + +/** + * Filter out reads with low mapping qualities. + * + * @author ebanks + * @version 0.1 + */ + +public class BadMateReadFilter implements SamRecordFilter { + + @Argument(fullName = "use_reads_with_bad_mates", shortName = "bad_mates", doc = "Use reads whose mates are mapped excessively far away for calling", required = false) + public boolean USE_BADLY_MATED_READS = false; + + public boolean filterOut(SAMRecord rec) { + return (!USE_BADLY_MATED_READS && rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && rec.getMateReferenceIndex() != rec.getReferenceIndex()); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java new file mode 100755 index 000000000..b9ad8a60b --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/MappingQualityReadFilter.java @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2009 The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.cmdLine.Argument; + +/** + * Filter out reads with low mapping qualities. + * + * @author ebanks + * @version 0.1 + */ + +public class MappingQualityReadFilter implements SamRecordFilter { + + @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false) + public int MIN_MAPPING_QUALTY_SCORE = 30; + + public boolean filterOut(SAMRecord rec) { + return (rec.getMappingQuality() < MIN_MAPPING_QUALTY_SCORE); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index fa7195e26..e181e1cb3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -115,7 +115,7 @@ public class DepthOfCoverageWalker extends LocusWalker>, Integer> { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -183,7 +184,10 @@ public class UnifiedGenotyper extends LocusWalker commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(Collections.singleton(UAC)); + Set x = new HashSet(); + x.add(UAC); + x.addAll(getToolkit().getFilters()); + Map commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(x); for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue())); @@ -203,10 +207,10 @@ public class UnifiedGenotyper extends LocusWalker filteredPileup = new ArrayList(); for ( PileupElement p : pileup ) { - if ( (useBadMates || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) && - AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= maxMismatches ) + if ( AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= maxMismatches ) filteredPileup.add(p); } return new ReadBackedPileup(pileup.getLocation(), filteredPileup); diff --git a/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java b/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java index c9c4e9ffd..f927547e8 100644 --- a/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/utils/AlignmentUtils.java @@ -180,26 +180,28 @@ public class AlignmentUtils { int mismatches = 0; - GenomeLoc window = ref.getWindow(); + int windowStart = (int)ref.getWindow().getStart(); + int windowStop = (int)ref.getWindow().getStop(); char[] refBases = ref.getBases(); byte[] readBases = p.getRead().getReadBases(); Cigar c = p.getRead().getCigar(); int readIndex = 0; int currentPos = p.getRead().getAlignmentStart(); - int refIndex = Math.max(0, currentPos - (int)window.getStart()); + int refIndex = Math.max(0, currentPos - windowStart); for (int i = 0 ; i < c.numCigarElements() ; i++) { CigarElement ce = c.getCigarElement(i); + int cigarElementLength = ce.getLength(); switch ( ce.getOperator() ) { case M: - for (int j = 0; j < ce.getLength(); j++, readIndex++, currentPos++) { + for (int j = 0; j < cigarElementLength; j++, readIndex++, currentPos++) { // are we past the ref window? - if ( currentPos > window.getStop() ) + if ( currentPos > windowStop ) break; // are we before the ref window? - if ( currentPos < window.getStart() ) + if ( currentPos < windowStart ) continue; char refChr = refBases[refIndex++]; @@ -214,15 +216,15 @@ public class AlignmentUtils { } break; case I: - readIndex += ce.getLength(); + readIndex += cigarElementLength; break; case D: - currentPos += ce.getLength(); - if ( currentPos > window.getStart() ) - refIndex += Math.min(ce.getLength(), currentPos - window.getStart()); + currentPos += cigarElementLength; + if ( currentPos > windowStart ) + refIndex += Math.min(cigarElementLength, currentPos - windowStart); break; case S: - readIndex += ce.getLength(); + readIndex += cigarElementLength; break; default: throw new StingException("Only M,I,D,S cigar elements are currently supported; there was " + ce.getOperator()); } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 2494bae10..6c86cd690 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -176,6 +176,11 @@ public class ReadBackedPileup implements Iterable { return new ReadBackedPileup(loc, filteredPileup); } + + public ReadBackedPileup getBaseFilteredPileup( int minBaseQ ) { + return getBaseAndMappingFilteredPileup(minBaseQ, -1); + } + /** * Returns a pileup randomly downsampled to the desiredCoverage. *