Improved performance of deletion-free pileup and added mapping-quality-zero-free pileup convenience method.

Finished converting genotyper and annotator code to new ReadBackedPileup system.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2192 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-12-01 04:50:47 +00:00
parent 6bb864da2a
commit 00f15ea909
6 changed files with 59 additions and 59 deletions

View File

@ -209,6 +209,7 @@ public class LocusIteratorByState extends LocusIterator {
int size = 0;
int nDeletions = 0;
int nMQ0Reads = 0;
// todo -- performance problem -- should be lazy, really
for ( SAMRecordState state : readStates ) {
@ -220,13 +221,17 @@ public class LocusIteratorByState extends LocusIterator {
pile.add(new PileupElement(state.getRead(), -1));
nDeletions++;
}
if ( state.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
GenomeLoc loc = getLocation();
updateReadStates(); // critical - must be called after we get the current state offsets and location
// if we got reads with non-D/N over the current position, we are done
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions));
if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads));
}
}

View File

@ -14,8 +14,6 @@ import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsTo
import java.util.*;
import java.io.*;
import net.sf.samtools.SAMRecord;
/**
* VariantAnnotator annotates variants.
@ -188,24 +186,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
// set up the pileup for the full collection of reads at this position
ReadBackedPileup fullPileup = context.getPileup();
// todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient
// also, set up the pileup for the mapping-quality-zero-free context
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
Iterator<SAMRecord> readIter = reads.iterator();
Iterator<Integer> offsetIter = offsets.iterator();
ArrayList<SAMRecord> MQ0freeReads = new ArrayList<SAMRecord>();
ArrayList<Integer> MQ0freeOffsets = new ArrayList<Integer>();
while ( readIter.hasNext() ) {
SAMRecord read = readIter.next();
Integer offset = offsetIter.next();
if ( read.getMappingQuality() > 0 ) {
MQ0freeReads.add(read);
MQ0freeOffsets.add(offset);
}
}
ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets);
ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads();
HashMap<String, String> results = new HashMap<String, String>();

View File

@ -279,14 +279,13 @@ public class GenotypeLikelihoods implements Cloneable {
int n = 0;
for ( PileupElement p : pileup ) {
byte base = p.getBase();
// ignore deletions
if ( base == PileupElement.DELETION_BASE )
if ( p.isDeletion() )
continue;
byte qual = p.getQual();
if ( ! ignoreBadBases || ! badBase((char)base) ) {
n += add((char)base, qual, p.getRead(), p.getOffset());
char base = (char)p.getBase();
if ( ! ignoreBadBases || ! badBase(base) ) {
n += add(base, p.getQual(), p.getRead(), p.getOffset());
}
}

View File

@ -78,12 +78,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup();
for ( PileupElement p : pileup ) {
byte base = p.getBase();
// ignore deletions
if ( base == PileupElement.DELETION_BASE )
if ( p.isDeletion() )
continue;
int index = BaseUtils.simpleBaseToBaseIndex((char)base);
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
if ( index >= 0 )
qualCounts[index] += p.getQual();
}

View File

@ -25,9 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import net.sf.samtools.SAMReadGroupRecord;import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -166,8 +164,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext);
// an optimization to speed things up when there is no coverage or when overly covered
if ( MQ0freeContext.getReads().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getReads().size() > UAC.MAX_READS_IN_PILEUP) )
if ( MQ0freeContext.getPileup().size() == 0 ||
(UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
@ -183,22 +181,9 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeL
}
private AlignmentContext filterAlignmentContext(AlignmentContext context) {
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
int numReads = reads.size();
List<SAMRecord> newReads = new ArrayList<SAMRecord>(numReads);
List<Integer> newOffsets = new ArrayList<Integer>(numReads);
for (int i = 0; i < numReads; i++) {
SAMRecord read = reads.get(i);
if ( read.getMappingQuality() > 0 ) {
newReads.add(read);
newOffsets.add(offsets.get(i));
}
}
return new AlignmentContext(context.getLocation(), newReads, newOffsets);
return new AlignmentContext(context.getLocation(),
context.getPileup().getPileupWithoutMappingQualityZeroReads(),
0);
}
public Integer reduceInit() { return 0; }

View File

@ -23,6 +23,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
private int size = 0; // cached value of the size of the pileup
private int nDeletions = 0; // cached value of the number of deletions
private int nMQ0Reads = 0; // cached value of the number of MQ0 reads
/**
* Create a new version of a read backed pileup at loc, using the reads and their corresponding
@ -60,7 +61,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @param loc
* @param pileup
*/
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions ) {
public ReadBackedPileup(GenomeLoc loc, ArrayList<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads ) {
if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2");
if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2");
@ -68,6 +69,7 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
this.pileup = pileup;
this.size = size;
this.nDeletions = nDeletions;
this.nMQ0Reads = nMQ0Reads;
}
@ -79,12 +81,16 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
private void calculatedCachedData() {
size = 0;
nDeletions = 0;
nMQ0Reads = 0;
for ( PileupElement p : this ) {
size++;
if ( p.isDeletion() ) {
nDeletions++;
}
if ( p.getRead().getMappingQuality() == 0 ) {
nMQ0Reads++;
}
}
}
@ -123,15 +129,38 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
* @return
*/
public ReadBackedPileup getPileupWithoutDeletions() {
// todo -- fixme
if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions
if ( getNumberOfDeletions() > 0 ) {
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
List<Integer> newOffsets = new ArrayList<Integer>();
for ( int i = 0; i < size(); i++ ) {
if ( getOffsets().get(i) != -1 ) {
newReads.add(getReads().get(i));
newOffsets.add(getOffsets().get(i));
for ( PileupElement p : pileup ) {
if ( !p.isDeletion() ) {
newReads.add(p.getRead());
newOffsets.add(p.getOffset());
}
}
return new ReadBackedPileup(loc, newReads, newOffsets);
} else {
return this;
}
}
/**
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
*
* @return
*/
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() {
if ( getNumberOfMappingQualityZeroReads() > 0 ) {
List<SAMRecord> newReads = new ArrayList<SAMRecord>();
List<Integer> newOffsets = new ArrayList<Integer>();
for ( PileupElement p : pileup ) {
if ( p.getRead().getMappingQuality() > 0 ) {
newReads.add(p.getRead());
newOffsets.add(p.getOffset());
}
}
return new ReadBackedPileup(loc, newReads, newOffsets);
@ -220,6 +249,10 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
return nDeletions;
}
public int getNumberOfMappingQualityZeroReads() {
return nMQ0Reads;
}
// public int getNumberOfDeletions() {
// int n = 0;
//
@ -230,13 +263,11 @@ public class ReadBackedPileup implements Iterable<PileupElement> {
// return n;
// }
// todo -- optimize me
/**
* @return the number of elements in this pileup
*/
public int size() {
return size;
//return pileup.size();
}
/**