An initial fix for performance issues when filtering UG with new StratifiedAlignmentContext.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3724 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
be75b087ec
commit
773a72e6ea
|
|
@ -106,10 +106,14 @@ public class StratifiedAlignmentContext<RBP extends ReadBackedPileup> {
|
||||||
for(String sampleName: pileup.getSamples()) {
|
for(String sampleName: pileup.getSamples()) {
|
||||||
RBP pileupBySample = (RBP)pileup.getPileupForSample(sampleName);
|
RBP pileupBySample = (RBP)pileup.getPileupForSample(sampleName);
|
||||||
|
|
||||||
|
// Don't add empty pileups to the split context.
|
||||||
|
if(pileupBySample.size() == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
if(sampleName != null)
|
if(sampleName != null)
|
||||||
contexts.put(sampleName,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
|
contexts.put(sampleName,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
|
||||||
else {
|
else {
|
||||||
if(assumedSingleSample == null && pileupBySample.size() > 0)
|
if(assumedSingleSample == null)
|
||||||
throw new StingException("Missing read group for read " + pileupBySample.iterator().next().getRead());
|
throw new StingException("Missing read group for read " + pileupBySample.iterator().next().getRead());
|
||||||
contexts.put(assumedSingleSample,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
|
contexts.put(assumedSingleSample,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -155,6 +155,7 @@ public class UnifiedGenotyperEngine {
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
VariantCallContext call;
|
VariantCallContext call;
|
||||||
|
BadlyMatedReadPileupFilter badlyMatedReadPileupFilter = new BadlyMatedReadPileupFilter(refContext);
|
||||||
|
|
||||||
if ( rawContext.hasExtendedEventPileup() ) {
|
if ( rawContext.hasExtendedEventPileup() ) {
|
||||||
|
|
||||||
|
|
@ -164,7 +165,7 @@ public class UnifiedGenotyperEngine {
|
||||||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
// filter the context based on bad mates and mismatch rate
|
// filter the context based on bad mates and mismatch rate
|
||||||
pileup = filterPileup(pileup, refContext);
|
pileup = pileup.getFilteredPileup(badlyMatedReadPileupFilter);
|
||||||
|
|
||||||
// don't call when there is no coverage
|
// don't call when there is no coverage
|
||||||
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
||||||
|
|
@ -185,7 +186,7 @@ public class UnifiedGenotyperEngine {
|
||||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||||
|
|
||||||
// filter the context based on bad mates and mismatch rate
|
// filter the context based on bad mates and mismatch rate
|
||||||
pileup = filterPileup(pileup, refContext);
|
pileup = pileup.getFilteredPileup(badlyMatedReadPileupFilter);
|
||||||
|
|
||||||
// don't call when there is no coverage
|
// don't call when there is no coverage
|
||||||
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
if ( pileup.size() == 0 && !UAC.ALL_BASES_MODE )
|
||||||
|
|
@ -219,31 +220,24 @@ public class UnifiedGenotyperEngine {
|
||||||
return call;
|
return call;
|
||||||
}
|
}
|
||||||
|
|
||||||
// filter based on maximum mismatches and bad mates
|
|
||||||
private ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext) {
|
|
||||||
|
|
||||||
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
|
|
||||||
for ( PileupElement p : pileup ) {
|
|
||||||
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
|
|
||||||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
|
||||||
filteredPileup.add(p);
|
|
||||||
}
|
|
||||||
return new ReadBackedPileupImpl(pileup.getLocation(), filteredPileup);
|
|
||||||
}
|
|
||||||
|
|
||||||
// filter based on maximum mismatches and bad mates
|
|
||||||
private ReadBackedExtendedEventPileup filterPileup(ReadBackedExtendedEventPileup pileup, ReferenceContext refContext) {
|
|
||||||
|
|
||||||
ArrayList<ExtendedEventPileupElement> filteredPileup = new ArrayList<ExtendedEventPileupElement>();
|
|
||||||
for ( ExtendedEventPileupElement p : pileup.toExtendedIterable() ) {
|
|
||||||
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
|
|
||||||
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
|
|
||||||
filteredPileup.add(p);
|
|
||||||
}
|
|
||||||
return new ReadBackedExtendedEventPileupImpl(pileup.getLocation(), filteredPileup);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean isValidDeletionFraction(double d) {
|
private static boolean isValidDeletionFraction(double d) {
|
||||||
return ( d >= 0.0 && d <= 1.0 );
|
return ( d >= 0.0 && d <= 1.0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filters low quality reads out of the pileup.
|
||||||
|
*/
|
||||||
|
private class BadlyMatedReadPileupFilter implements PileupElementFilter {
|
||||||
|
private ReferenceContext refContext;
|
||||||
|
|
||||||
|
public BadlyMatedReadPileupFilter(ReferenceContext refContext) { this.refContext = refContext; }
|
||||||
|
|
||||||
|
public boolean allow(PileupElement pileupElement) {
|
||||||
|
return ((UAC.USE_BADLY_MATED_READS ||
|
||||||
|
!pileupElement.getRead().getReadPairedFlag() ||
|
||||||
|
pileupElement.getRead().getMateUnmappedFlag() ||
|
||||||
|
pileupElement.getRead().getMateReferenceIndex() == pileupElement.getRead().getReferenceIndex()) &&
|
||||||
|
AlignmentUtils.mismatchesInRefWindow(pileupElement, refContext, true) <= UAC.MAX_MISMATCHES );
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -385,6 +385,36 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a pileup consisting of all those elements passed by a given filter.
|
||||||
|
* @param filter Filter to use when testing for elements.
|
||||||
|
* @return a pileup without the given filtered elements.
|
||||||
|
*/
|
||||||
|
public RBP getFilteredPileup(PileupElementFilter filter) {
|
||||||
|
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
|
||||||
|
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
|
||||||
|
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
|
||||||
|
|
||||||
|
for(String sampleName: tracker.getSamples()) {
|
||||||
|
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
|
||||||
|
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
|
||||||
|
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (RBP)createNewPileup(loc,filteredTracker);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
|
||||||
|
|
||||||
|
for ( PE p : pileupElementTracker ) {
|
||||||
|
if( filter.allow(p) )
|
||||||
|
filteredTracker.add(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (RBP)createNewPileup(loc, filteredTracker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||||
* @param minBaseQ
|
* @param minBaseQ
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.pileup;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A filtering interface for pileup elements.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public interface PileupElementFilter {
|
||||||
|
public boolean allow(final PileupElement pileupElement);
|
||||||
|
}
|
||||||
|
|
@ -81,6 +81,13 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
|
||||||
*/
|
*/
|
||||||
public ReadBackedExtendedEventPileup getNegativeStrandPileup();
|
public ReadBackedExtendedEventPileup getNegativeStrandPileup();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a pileup consisting of all those elements passed by a given filter.
|
||||||
|
* @param filter Filter to use when testing for elements.
|
||||||
|
* @return a pileup without the given filtered elements.
|
||||||
|
*/
|
||||||
|
public ReadBackedExtendedEventPileup getFilteredPileup(PileupElementFilter filter);
|
||||||
|
|
||||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||||
* @param minBaseQ
|
* @param minBaseQ
|
||||||
|
|
|
||||||
|
|
@ -78,6 +78,13 @@ public interface ReadBackedPileup extends Iterable<PileupElement> {
|
||||||
*/
|
*/
|
||||||
public ReadBackedPileup getNegativeStrandPileup();
|
public ReadBackedPileup getNegativeStrandPileup();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a pileup consisting of all those elements passed by a given filter.
|
||||||
|
* @param filter Filter to use when testing for elements.
|
||||||
|
* @return a pileup without the given filtered elements.
|
||||||
|
*/
|
||||||
|
public ReadBackedPileup getFilteredPileup(PileupElementFilter filter);
|
||||||
|
|
||||||
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
||||||
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
||||||
* @param minBaseQ
|
* @param minBaseQ
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue