diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index fdecd3d5e..bb0d9b8dd 100755 --- a/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -209,6 +209,7 @@ public class LocusIteratorByState extends LocusIterator { int size = 0; int nDeletions = 0; + int nMQ0Reads = 0; // todo -- performance problem -- should be lazy, really for ( SAMRecordState state : readStates ) { @@ -220,13 +221,17 @@ public class LocusIteratorByState extends LocusIterator { pile.add(new PileupElement(state.getRead(), -1)); nDeletions++; } + + if ( state.getRead().getMappingQuality() == 0 ) { + nMQ0Reads++; + } } GenomeLoc loc = getLocation(); updateReadStates(); // critical - must be called after we get the current state offsets and location // if we got reads with non-D/N over the current position, we are done - if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions)); + if ( pile.size() != 0 ) return new AlignmentContext(loc, new ReadBackedPileup(loc, pile, size, nDeletions, nMQ0Reads)); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 89905a9fc..2925365d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -14,8 +14,6 @@ import org.broadinstitute.sting.playground.gatk.walkers.variantstovcf.VariantsTo import java.util.*; import java.io.*; -import net.sf.samtools.SAMRecord; - /** * VariantAnnotator annotates variants. @@ -188,24 +186,7 @@ public class VariantAnnotator extends RodWalker { // set up the pileup for the full collection of reads at this position ReadBackedPileup fullPileup = context.getPileup(); - - // todo -- reimplement directly using ReadBackedPileups, which is vastly more efficient - // also, set up the pileup for the mapping-quality-zero-free context - List reads = context.getReads(); - List offsets = context.getOffsets(); - Iterator readIter = reads.iterator(); - Iterator offsetIter = offsets.iterator(); - ArrayList MQ0freeReads = new ArrayList(); - ArrayList MQ0freeOffsets = new ArrayList(); - while ( readIter.hasNext() ) { - SAMRecord read = readIter.next(); - Integer offset = offsetIter.next(); - if ( read.getMappingQuality() > 0 ) { - MQ0freeReads.add(read); - MQ0freeOffsets.add(offset); - } - } - ReadBackedPileup MQ0freePileup = new ReadBackedPileup(context.getLocation(), MQ0freeReads, MQ0freeOffsets); + ReadBackedPileup MQ0freePileup = fullPileup.getPileupWithoutMappingQualityZeroReads(); HashMap results = new HashMap(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java index 8322e18b1..e1203898f 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoods.java @@ -279,14 +279,13 @@ public class GenotypeLikelihoods implements Cloneable { int n = 0; for ( PileupElement p : pileup ) { - byte base = p.getBase(); // ignore deletions - if ( base == PileupElement.DELETION_BASE ) + if ( p.isDeletion() ) continue; - byte qual = p.getQual(); - if ( ! ignoreBadBases || ! badBase((char)base) ) { - n += add((char)base, qual, p.getRead(), p.getOffset()); + char base = (char)p.getBase(); + if ( ! ignoreBadBases || ! badBase(base) ) { + n += add(base, p.getQual(), p.getRead(), p.getOffset()); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index eda16749b..9a7aec7ee 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -78,12 +78,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc // calculate the sum of quality scores for each base ReadBackedPileup pileup = context.getPileup(); for ( PileupElement p : pileup ) { - byte base = p.getBase(); // ignore deletions - if ( base == PileupElement.DELETION_BASE ) + if ( p.isDeletion() ) continue; - int index = BaseUtils.simpleBaseToBaseIndex((char)base); + int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase()); if ( index >= 0 ) qualCounts[index] += p.getQual(); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 1eab5838b..9cb77dc9a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,9 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import net.sf.samtools.SAMReadGroupRecord;import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -166,8 +164,8 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL AlignmentContext MQ0freeContext = filterAlignmentContext(fullContext); // an optimization to speed things up when there is no coverage or when overly covered - if ( MQ0freeContext.getReads().size() == 0 || - (UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getReads().size() > UAC.MAX_READS_IN_PILEUP) ) + if ( MQ0freeContext.getPileup().size() == 0 || + (UAC.MAX_READS_IN_PILEUP > 0 && MQ0freeContext.getPileup().size() > UAC.MAX_READS_IN_PILEUP) ) return null; DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); @@ -183,22 +181,9 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeL } private AlignmentContext filterAlignmentContext(AlignmentContext context) { - List reads = context.getReads(); - List offsets = context.getOffsets(); - int numReads = reads.size(); - - List newReads = new ArrayList(numReads); - List newOffsets = new ArrayList(numReads); - - for (int i = 0; i < numReads; i++) { - SAMRecord read = reads.get(i); - if ( read.getMappingQuality() > 0 ) { - newReads.add(read); - newOffsets.add(offsets.get(i)); - } - } - - return new AlignmentContext(context.getLocation(), newReads, newOffsets); + return new AlignmentContext(context.getLocation(), + context.getPileup().getPileupWithoutMappingQualityZeroReads(), + 0); } public Integer reduceInit() { return 0; } diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 7fe8d0abc..06babb787 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -23,6 +23,7 @@ public class ReadBackedPileup implements Iterable { private int size = 0; // cached value of the size of the pileup private int nDeletions = 0; // cached value of the number of deletions + private int nMQ0Reads = 0; // cached value of the number of MQ0 reads /** * Create a new version of a read backed pileup at loc, using the reads and their corresponding @@ -60,7 +61,7 @@ public class ReadBackedPileup implements Iterable { * @param loc * @param pileup */ - public ReadBackedPileup(GenomeLoc loc, ArrayList pileup, int size, int nDeletions ) { + public ReadBackedPileup(GenomeLoc loc, ArrayList pileup, int size, int nDeletions, int nMQ0Reads ) { if ( loc == null ) throw new StingException("Illegal null genomeloc in ReadBackedPileup2"); if ( pileup == null ) throw new StingException("Illegal null pileup in ReadBackedPileup2"); @@ -68,6 +69,7 @@ public class ReadBackedPileup implements Iterable { this.pileup = pileup; this.size = size; this.nDeletions = nDeletions; + this.nMQ0Reads = nMQ0Reads; } @@ -79,12 +81,16 @@ public class ReadBackedPileup implements Iterable { private void calculatedCachedData() { size = 0; nDeletions = 0; + nMQ0Reads = 0; for ( PileupElement p : this ) { size++; if ( p.isDeletion() ) { nDeletions++; } + if ( p.getRead().getMappingQuality() == 0 ) { + nMQ0Reads++; + } } } @@ -123,15 +129,38 @@ public class ReadBackedPileup implements Iterable { * @return */ public ReadBackedPileup getPileupWithoutDeletions() { - // todo -- fixme - if ( getNumberOfDeletions() > 0 ) { // todo -- remember number of deletions + if ( getNumberOfDeletions() > 0 ) { List newReads = new ArrayList(); List newOffsets = new ArrayList(); - for ( int i = 0; i < size(); i++ ) { - if ( getOffsets().get(i) != -1 ) { - newReads.add(getReads().get(i)); - newOffsets.add(getOffsets().get(i)); + for ( PileupElement p : pileup ) { + if ( !p.isDeletion() ) { + newReads.add(p.getRead()); + newOffsets.add(p.getOffset()); + } + } + return new ReadBackedPileup(loc, newReads, newOffsets); + } else { + return this; + } + } + + /** + * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no MQ0 reads in the pileup. + * + * @return + */ + public ReadBackedPileup getPileupWithoutMappingQualityZeroReads() { + if ( getNumberOfMappingQualityZeroReads() > 0 ) { + List newReads = new ArrayList(); + List newOffsets = new ArrayList(); + + for ( PileupElement p : pileup ) { + if ( p.getRead().getMappingQuality() > 0 ) { + newReads.add(p.getRead()); + newOffsets.add(p.getOffset()); } } return new ReadBackedPileup(loc, newReads, newOffsets); @@ -220,6 +249,10 @@ public class ReadBackedPileup implements Iterable { return nDeletions; } + public int getNumberOfMappingQualityZeroReads() { + return nMQ0Reads; + } + // public int getNumberOfDeletions() { // int n = 0; // @@ -230,13 +263,11 @@ public class ReadBackedPileup implements Iterable { // return n; // } - // todo -- optimize me /** * @return the number of elements in this pileup */ public int size() { return size; - //return pileup.size(); } /**