Improved performance of deletion-free pileup and added mapping-quality-zero-free pileup convenience method.
Finished converting genotyper and annotator code to new ReadBackedPileup system. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2192 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6bb864da2a
commit
00f15ea909
|
|
@ -209,6 +209,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
int size = 0;
|
||||
int nDeletions = 0;
|
||||
int nMQ0Reads = 0;
|
||||
|
||||
// todo -- performance problem -- should be lazy, really
|
||||
for ( SAMRecordState state : readStates ) {
|
||||
|
|
@ -220,13 +221,17 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
|
||||
if ( state.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
GenomeLoc loc = getLocation();
|
||||
|
||||
updateReadStates(); // critical - must be called after we get the current state offsets and location
|
||||
|
||||
// if we got reads with non-D/N over the current position, we are done
|
||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions));
|
||||
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsTo
|
|||
import java.util.*;
|
||||
import java.io.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
||||
/**
|
||||
* VariantAnnotator annotates variants.
|
||||
|
|
@ -188,24 +186,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
|
||||
// set up the pileup for the full collection of reads at this position
|
||||
ReadBackedPileup fullPileup = context.getPileup();
|
||||
|
||||
// todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient
|
||||
// also, set up the pileup for the mapping-quality-zero-free context
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
Iterator<SAMRecord> readIter = reads.iterator();
|
||||
Iterator<Integer> offsetIter = offsets.iterator();
|
||||
ArrayList<SAMRecord> MQ0freeReads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> MQ0freeOffsets = new ArrayList<Integer>();
|
||||
while ( readIter.hasNext() ) {
|
||||
SAMRecord read = readIter.next();
|
||||
Integer offset = offsetIter.next();
|
||||
if ( read.getMappingQuality() > 0 ) {
|
||||
MQ0freeReads.add(read);
|
||||
MQ0freeOffsets.add(offset);
|
||||
}
|
||||
}
|
||||
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets);
|
||||
ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads();
|
||||
|
||||
HashMap<String, String> results = new HashMap<String, String>();
|
||||
|
||||
|
|
|
|||
|
|
@ -279,14 +279,13 @@ public class GenotypeLikelihoods implements Cloneable {
|
|||
int n = 0;
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
byte base = p.getBase();
|
||||
// ignore deletions
|
||||
if ( base == PileupElement.DELETION_BASE )
|
||||
if ( p.isDeletion() )
|
||||
continue;
|
||||
|
||||
byte qual = p.getQual();
|
||||
if ( ! ignoreBadBases || ! badBase((char)base) ) {
|
||||
n += add((char)base, qual, p.getRead(), p.getOffset());
|
||||
char base = (char)p.getBase();
|
||||
if ( ! ignoreBadBases || ! badBase(base) ) {
|
||||
n += add(base, p.getQual(), p.getRead(), p.getOffset());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -78,12 +78,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
// calculate the sum of quality scores for each base
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
byte base = p.getBase();
|
||||
// ignore deletions
|
||||
if ( base == PileupElement.DELETION_BASE )
|
||||
if ( p.isDeletion() )
|
||||
continue;
|
||||
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)base);
|
||||
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
|
||||
if ( index >= 0 )
|
||||
qualCounts[index] += p.getQual();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,9 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import net.sf.samtools.SAMReadGroupRecord;import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -166,8 +164,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
|
|||
AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext);
|
||||
|
||||
// an optimization to speed things up when there is no coverage or when overly covered
|
||||
if ( MQ0freeContext.getReads().size() == 0 ||
|
||||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getReads().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||
if ( MQ0freeContext.getPileup().size() == 0 ||
|
||||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
|
||||
return null;
|
||||
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||
|
|
@ -183,22 +181,9 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
|
|||
}
|
||||
|
||||
private AlignmentContext filterAlignmentContext(AlignmentContext context) {
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
int numReads = reads.size();
|
||||
|
||||
List<SAMRecord> newReads = new ArrayList<SAMRecord>(numReads);
|
||||
List<Integer> newOffsets = new ArrayList<Integer>(numReads);
|
||||
|
||||
for (int i = 0; i < numReads; i++) {
|
||||
SAMRecord read = reads.get(i);
|
||||
if ( read.getMappingQuality() > 0 ) {
|
||||
newReads.add(read);
|
||||
newOffsets.add(offsets.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
return new AlignmentContext(context.getLocation(), newReads, newOffsets);
|
||||
return new AlignmentContext(context.getLocation(),
|
||||
context.getPileup().getPileupWithoutMappingQualityZeroReads(),
|
||||
0);
|
||||
}
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
|
||||
private int size = 0; // cached value of the size of the pileup
|
||||
private int nDeletions = 0; // cached value of the number of deletions
|
||||
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
/**
|
||||
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
|
||||
|
|
@ -60,7 +61,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
* @param loc
|
||||
* @param pileup
|
||||
*/
|
||||
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions ) {
|
||||
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
|
||||
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
|
||||
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
|
||||
|
||||
|
|
@ -68,6 +69,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
this.pileup = pileup;
|
||||
this.size = size;
|
||||
this.nDeletions = nDeletions;
|
||||
this.nMQ0Reads = nMQ0Reads;
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -79,12 +81,16 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
private void calculatedCachedData() {
|
||||
size = 0;
|
||||
nDeletions = 0;
|
||||
nMQ0Reads = 0;
|
||||
|
||||
for ( PileupElement p : this ) {
|
||||
size++;
|
||||
if ( p.isDeletion() ) {
|
||||
nDeletions++;
|
||||
}
|
||||
if ( p.getRead().getMappingQuality() == 0 ) {
|
||||
nMQ0Reads++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,15 +129,38 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getPileupWithoutDeletions() {
|
||||
// todo -- fixme
|
||||
if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions
|
||||
if ( getNumberOfDeletions() > 0 ) {
|
||||
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
|
||||
List<Integer> newOffsets = new ArrayList<Integer>();
|
||||
|
||||
for ( int i = 0; i < size(); i++ ) {
|
||||
if ( getOffsets().get(i) != -1 ) {
|
||||
newReads.add(getReads().get(i));
|
||||
newOffsets.add(getOffsets().get(i));
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( !p.isDeletion() ) {
|
||||
newReads.add(p.getRead());
|
||||
newOffsets.add(p.getOffset());
|
||||
}
|
||||
}
|
||||
return new ReadBackedPileup(loc, newReads, newOffsets);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
||||
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
||||
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
|
||||
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
|
||||
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
|
||||
List<Integer> newOffsets = new ArrayList<Integer>();
|
||||
|
||||
for ( PileupElement p : pileup ) {
|
||||
if ( p.getRead().getMappingQuality() > 0 ) {
|
||||
newReads.add(p.getRead());
|
||||
newOffsets.add(p.getOffset());
|
||||
}
|
||||
}
|
||||
return new ReadBackedPileup(loc, newReads, newOffsets);
|
||||
|
|
@ -220,6 +249,10 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
return nDeletions;
|
||||
}
|
||||
|
||||
public int getNumberOfMappingQualityZeroReads() {
|
||||
return nMQ0Reads;
|
||||
}
|
||||
|
||||
// public int getNumberOfDeletions() {
|
||||
// int n = 0;
|
||||
//
|
||||
|
|
@ -230,13 +263,11 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
|
|||
// return n;
|
||||
// }
|
||||
|
||||
// todo -- optimize me
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
public int size() {
|
||||
return size;
|
||||
//return pileup.size();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue