An initial fix for performance issues when filtering UG with new StratifiedAlignmentContext.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3724 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-07-07 01:07:46 +00:00
parent be75b087ec
commit 773a72e6ea
6 changed files with 104 additions and 27 deletions

View File

@ -106,10 +106,14 @@ public class StratifiedAlignmentContext<RBP extends ReadBackedPileup> {
for(String sampleName: pileup.getSamples()) {
RBP pileupBySample = (RBP)pileup.getPileupForSample(sampleName);
// Don't add empty pileups to the split context.
if(pileupBySample.size() == 0)
continue;
if(sampleName != null)
contexts.put(sampleName,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
else {
if(assumedSingleSample == null && pileupBySample.size() > 0)
if(assumedSingleSample == null)
throw new StingException("Missing read group for read " + pileupBySample.iterator().next().getRead());
contexts.put(assumedSingleSample,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
}

View File

@ -155,6 +155,7 @@ public class UnifiedGenotyperEngine {
return null;
VariantCallContext call;
BadlyMatedReadPileupFilter badlyMatedReadPileupFilter = new BadlyMatedReadPileupFilter(refContext);
if ( rawContext.hasExtendedEventPileup() ) {
@ -164,7 +165,7 @@ public class UnifiedGenotyperEngine {
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
// filter the context based on bad mates and mismatch rate
pileup = filterPileup(pileup, refContext);
pileup = pileup.getFilteredPileup(badlyMatedReadPileupFilter);
// don't call when there is no coverage
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
@ -185,7 +186,7 @@ public class UnifiedGenotyperEngine {
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
// filter the context based on bad mates and mismatch rate
pileup = filterPileup(pileup, refContext);
pileup = pileup.getFilteredPileup(badlyMatedReadPileupFilter);
// don't call when there is no coverage
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
@ -219,31 +220,24 @@ public class UnifiedGenotyperEngine {
return call;
}
// filter based on maximum mismatches and bad mates
private ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup);
}
// filter based on maximum mismatches and bad mates
private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) {
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
for ( ExtendedEventPileupElement p : pileup.toExtendedIterable() ) {
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new ReadBackedExtendedEventPileupImpl(pileup.getLocation(), filteredPileup);
}
private static boolean isValidDeletionFraction(double d) {
return ( d >= 0.0 && d <= 1.0 );
}
/**
* Filters low quality reads out of the pileup.
*/
private class BadlyMatedReadPileupFilter implements PileupElementFilter {
private ReferenceContext refContext;
public BadlyMatedReadPileupFilter(ReferenceContext refContext) { this.refContext = refContext; }
public boolean allow(PileupElement pileupElement) {
return ((UAC.USE_BADLY_MATED_READS ||
!pileupElement.getRead().getReadPairedFlag() ||
pileupElement.getRead().getMateUnmappedFlag() ||
pileupElement.getRead().getMateReferenceIndex() == pileupElement.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(pileupElement, refContext, true) <= UAC.MAX_MISMATCHES );
}
}
}

View File

@ -385,6 +385,36 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
/**
* Gets a pileup consisting of all those elements passed by a given filter.
* @param filter Filter to use when testing for elements.
* @return a pileup without the given filtered elements.
*/
public RBP getFilteredPileup(PileupElementFilter filter) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
}
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for ( PE p : pileupElementTracker ) {
if( filter.allow(p) )
filteredTracker.add(p);
}
return (RBP)createNewPileup(loc, filteredTracker);
}
}
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.pileup;
/**
* A filtering interface for pileup elements.
*
* @author mhanna
* @version 0.1
*/
public interface PileupElementFilter {
public boolean allow(final PileupElement pileupElement);
}

View File

@ -80,6 +80,13 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
* @return A read-backed pileup consisting only of reads on the negative strand.
*/
public ReadBackedExtendedEventPileup getNegativeStrandPileup();
/**
* Gets a pileup consisting of all those elements passed by a given filter.
* @param filter Filter to use when testing for elements.
* @return a pileup without the given filtered elements.
*/
public ReadBackedExtendedEventPileup getFilteredPileup(PileupElementFilter filter);
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.

View File

@ -78,6 +78,13 @@ public interface ReadBackedPileup extends Iterable<PileupElement> {
*/
public ReadBackedPileup getNegativeStrandPileup();
/**
* Gets a pileup consisting of all those elements passed by a given filter.
* @param filter Filter to use when testing for elements.
* @return a pileup without the given filtered elements.
*/
public ReadBackedPileup getFilteredPileup(PileupElementFilter filter);
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
* @param minBaseQ