From 9e23c592e6010475e63a64060a55628d5390c985 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 10 Jan 2013 16:26:51 -0500 Subject: [PATCH] ReadBackedPileup cleanup -- Only ReadBackedPileupImpl (concrete class) and ReadBackedPileup (interface) live, moved all functionality of AbstractReadBackedPileup into the impl -- ReadBackedPileupImpl was literally a shell class after we removed extended events. A few bits of code cleanup and we reduced a bunch of class complexity in the gatk -- ReadBackedPileups no longer accept pre-cached values (size, nMapQ reads, etc) but now lazy load these values as needed -- Created optimized calculation routines to iterator over all of the reads in the pileup in whatever order is most efficient as well. -- New LIBS no longer calculates size, n mapq, and n deletion reads while making pileups. -- Added commons-collections for IteratorChain --- ivy.xml | 1 + .../locusiterator/LocusIteratorByState.java | 31 +- .../pileup/AbstractReadBackedPileup.java | 1064 ----------------- .../utils/pileup/PileupElementTracker.java | 38 + .../utils/pileup/ReadBackedPileupImpl.java | 1002 +++++++++++++++- .../pileup/ReadBackedPileupUnitTest.java | 113 +- 6 files changed, 1143 insertions(+), 1106 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java diff --git a/ivy.xml b/ivy.xml index 6b60acfa3..1802c1627 100644 --- a/ivy.xml +++ b/ivy.xml @@ -61,6 +61,7 @@ + diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java index 22de68a5d..fe769bead 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java @@ -242,39 +242,30 @@ public class LocusIteratorByState extends LocusIterator { final Iterator iterator = readStates.iterator(sample); final List pile = new ArrayList(readStates.size(sample)); - int size = 0; // number of elements in this sample's pileup - int nDeletions = 0; // number of deletions in this sample's pileup - int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0) - while (iterator.hasNext()) { - final AlignmentStateMachine state = iterator.next(); // state object with the read/offset information - final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read - final CigarOperator op = state.getCigarOperator(); // current cigar operator + // state object with the read/offset information + final AlignmentStateMachine state = iterator.next(); + final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); + final CigarOperator op = state.getCigarOperator(); - if (op == CigarOperator.N) // N's are never added to any pileup + if (op == CigarOperator.N) // N's are never added to any pileup continue; if (!dontIncludeReadInPileup(read, location.getStart())) { - if ( op == CigarOperator.D ) { - if ( ! includeReadsWithDeletionAtLoci ) - continue; - nDeletions++; + if ( ! includeReadsWithDeletionAtLoci && op == CigarOperator.D ) { + continue; } pile.add(state.makePileupElement()); - size++; - - if ( read.getMappingQuality() == 0 ) - nMQ0Reads++; } } - if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup - fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads)); + if (! pile.isEmpty() ) // if this pileup added at least one base, add it to the full pileup + fullPileup.put(sample, new ReadBackedPileupImpl(location, pile)); } - updateReadStates(); // critical - must be called after we get the current state offsets and location - if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done + updateReadStates(); // critical - must be called after we get the current state offsets and location + if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java deleted file mode 100644 index 73a11de2c..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ /dev/null @@ -1,1064 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils.pileup; - -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.variant.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.fragments.FragmentCollection; -import org.broadinstitute.sting.utils.fragments.FragmentUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; - -import java.util.*; - -/** - * A generic implementation of read-backed pileups. - * - * @author mhanna - * @version 0.1 - */ -public abstract class AbstractReadBackedPileup, PE extends PileupElement> implements ReadBackedPileup { - protected final GenomeLoc loc; - protected final PileupElementTracker pileupElementTracker; - - protected int size = 0; // cached value of the size of the pileup - protected int abstractSize = -1; // cached value of the abstract size of the pileup - protected int nDeletions = 0; // cached value of the number of deletions - protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads - - /** - * Create a new version of a read backed pileup at loc, using the reads and their corresponding - * offsets. This pileup will contain a list, in order of the reads, of the piled bases at - * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to - * go changing the reads. - * - * @param loc The genome loc to associate reads wotj - * @param reads - * @param offsets - */ - public AbstractReadBackedPileup(GenomeLoc loc, List reads, List offsets) { - this.loc = loc; - this.pileupElementTracker = readsOffsets2Pileup(reads, offsets); - } - - - /** - * Create a new version of a read backed pileup at loc without any aligned reads - */ - public AbstractReadBackedPileup(GenomeLoc loc) { - this(loc, new UnifiedPileupElementTracker()); - } - - /** - * Create a new version of a read backed pileup at loc, using the reads and their corresponding - * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a - * pointer to pileup. Don't go changing the data in pileup. - */ - public AbstractReadBackedPileup(GenomeLoc loc, List pileup) { - if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup"); - if (pileup == null) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup"); - - this.loc = loc; - this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); - calculateCachedData(); - } - - /** - * Optimization of above constructor where all of the cached data is provided - * - * @param loc - * @param pileup - */ - public AbstractReadBackedPileup(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads) { - if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in UnifiedReadBackedPileup"); - if (pileup == null) throw new ReviewedStingException("Illegal null pileup in UnifiedReadBackedPileup"); - - this.loc = loc; - this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); - this.size = size; - this.nDeletions = nDeletions; - this.nMQ0Reads = nMQ0Reads; - } - - - protected AbstractReadBackedPileup(GenomeLoc loc, PileupElementTracker tracker) { - this.loc = loc; - this.pileupElementTracker = tracker; - calculateCachedData(); - } - - protected AbstractReadBackedPileup(GenomeLoc loc, Map> pileupsBySample) { - this.loc = loc; - PerSamplePileupElementTracker tracker = new PerSamplePileupElementTracker(); - for (Map.Entry> pileupEntry : pileupsBySample.entrySet()) { - tracker.addElements(pileupEntry.getKey(), pileupEntry.getValue().pileupElementTracker); - addPileupToCumulativeStats(pileupEntry.getValue()); - } - this.pileupElementTracker = tracker; - } - - public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset) { - this.loc = loc; - this.pileupElementTracker = readsOffsets2Pileup(reads, offset); - } - - /** - * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront, - * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling - * sizes, nDeletion, etc. over and over potentially. - */ - protected void calculateCachedData() { - size = 0; - nDeletions = 0; - nMQ0Reads = 0; - - for (PileupElement p : pileupElementTracker) { - size++; - if (p.isDeletion()) { - nDeletions++; - } - if (p.getRead().getMappingQuality() == 0) { - nMQ0Reads++; - } - } - } - - protected void calculateAbstractSize() { - abstractSize = 0; - for (PileupElement p : pileupElementTracker) { - abstractSize += p.getRepresentativeCount(); - } - } - - protected void addPileupToCumulativeStats(AbstractReadBackedPileup pileup) { - size += pileup.getNumberOfElements(); - abstractSize = pileup.depthOfCoverage() + (abstractSize == -1 ? 0 : abstractSize); - nDeletions += pileup.getNumberOfDeletions(); - nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads(); - } - - /** - * Helper routine for converting reads and offset lists to a PileupElement list. - * - * @param reads - * @param offsets - * @return - */ - private PileupElementTracker readsOffsets2Pileup(List reads, List offsets) { - if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); - if (offsets == null) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup"); - if (reads.size() != offsets.size()) - throw new ReviewedStingException("Reads and offset lists have different sizes!"); - - UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); - for (int i = 0; i < reads.size(); i++) { - GATKSAMRecord read = reads.get(i); - int offset = offsets.get(i); - pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important - } - - return pileup; - } - - /** - * Helper routine for converting reads and a single offset to a PileupElement list. - * - * @param reads - * @param offset - * @return - */ - private PileupElementTracker readsOffsets2Pileup(List reads, int offset) { - if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); - if (offset < 0) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup"); - - UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); - for (GATKSAMRecord read : reads) { - pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important - } - - return pileup; - } - - protected abstract AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); - - protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset); - - // -------------------------------------------------------- - // - // Special 'constructors' - // - // -------------------------------------------------------- - - /** - * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no deletions in the pileup. - * - * @return - */ - @Override - public RBP getPileupWithoutDeletions() { - if (getNumberOfDeletions() > 0) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupWithoutDeletions(); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return (RBP) createNewPileup(loc, filteredTracker); - - } else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - for (PE p : tracker) { - if (!p.isDeletion()) { - filteredTracker.add(p); - } - } - return (RBP) createNewPileup(loc, filteredTracker); - } - } else { - return (RBP) this; - } - } - - /** - * Returns a new ReadBackedPileup where only one read from an overlapping read - * pair is retained. If the two reads in question disagree to their basecall, - * neither read is retained. If they agree on the base, the read with the higher - * base quality observation is retained - * - * @return the newly filtered pileup - */ - @Override - public ReadBackedPileup getOverlappingFragmentFilteredPileup() { - return getOverlappingFragmentFilteredPileup(true, true); - } - - /** - * Returns a new ReadBackedPileup where only one read from an overlapping read - * pair is retained. If discardDiscordant and the two reads in question disagree to their basecall, - * neither read is retained. Otherwise, the read with the higher - * quality (base or mapping, depending on baseQualNotMapQual) observation is retained - * - * @return the newly filtered pileup - */ - @Override - public RBP getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(discardDiscordant, baseQualNotMapQual); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return (RBP) createNewPileup(loc, filteredTracker); - } else { - Map filteredPileup = new HashMap(); - - for (PE p : pileupElementTracker) { - String readName = p.getRead().getReadName(); - - // if we've never seen this read before, life is good - if (!filteredPileup.containsKey(readName)) { - filteredPileup.put(readName, p); - } else { - PileupElement existing = filteredPileup.get(readName); - - // if the reads disagree at this position, throw them both out. Otherwise - // keep the element with the higher quality score - if (discardDiscordant && existing.getBase() != p.getBase()) { - filteredPileup.remove(readName); - } else { - if (baseQualNotMapQual) { - if (existing.getQual() < p.getQual()) - filteredPileup.put(readName, p); - } - else { - if (existing.getMappingQual() < p.getMappingQual()) - filteredPileup.put(readName, p); - } - } - } - } - - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for (PE filteredElement : filteredPileup.values()) - filteredTracker.add(filteredElement); - - return (RBP) createNewPileup(loc, filteredTracker); - } - } - - - /** - * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this - * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy - * of the pileup (just returns this) if there are no MQ0 reads in the pileup. - * - * @return - */ - @Override - public RBP getPileupWithoutMappingQualityZeroReads() { - if (getNumberOfMappingQualityZeroReads() > 0) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupWithoutMappingQualityZeroReads(); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return (RBP) createNewPileup(loc, filteredTracker); - - } else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - for (PE p : tracker) { - if (p.getRead().getMappingQuality() > 0) { - filteredTracker.add(p); - } - } - return (RBP) createNewPileup(loc, filteredTracker); - } - } else { - return (RBP) this; - } - } - - public RBP getPositiveStrandPileup() { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPositiveStrandPileup(); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return (RBP) createNewPileup(loc, filteredTracker); - } else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - for (PE p : tracker) { - if (!p.getRead().getReadNegativeStrandFlag()) { - filteredTracker.add(p); - } - } - return (RBP) createNewPileup(loc, filteredTracker); - } - } - - /** - * Gets the pileup consisting of only reads on the negative strand. - * - * @return A read-backed pileup consisting only of reads on the negative strand. - */ - public RBP getNegativeStrandPileup() { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getNegativeStrandPileup(); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return (RBP) createNewPileup(loc, filteredTracker); - } else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - for (PE p : tracker) { - if (p.getRead().getReadNegativeStrandFlag()) { - filteredTracker.add(p); - } - } - return (RBP) createNewPileup(loc, filteredTracker); - } - } - - /** - * Gets a pileup consisting of all those elements passed by a given filter. - * - * @param filter Filter to use when testing for elements. - * @return a pileup without the given filtered elements. - */ - public RBP getFilteredPileup(PileupElementFilter filter) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getFilteredPileup(filter); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - - return (RBP) createNewPileup(loc, filteredTracker); - } else { - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - for (PE p : pileupElementTracker) { - if (filter.allow(p)) - filteredTracker.add(p); - } - - return (RBP) createNewPileup(loc, filteredTracker); - } - } - - /** - * Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from - * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. - * - * @param minBaseQ - * @param minMapQ - * @return - */ - @Override - public RBP getBaseAndMappingFilteredPileup(int minBaseQ, int minMapQ) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ, minMapQ); - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - - return (RBP) createNewPileup(loc, filteredTracker); - } else { - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - for (PE p : pileupElementTracker) { - if (p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ)) { - filteredTracker.add(p); - } - } - - return (RBP) createNewPileup(loc, filteredTracker); - } - } - - /** - * Returns subset of this pileup that contains only bases with quality >= minBaseQ. - * This method allocates and returns a new instance of ReadBackedPileup. - * - * @param minBaseQ - * @return - */ - @Override - public RBP getBaseFilteredPileup(int minBaseQ) { - return getBaseAndMappingFilteredPileup(minBaseQ, -1); - } - - /** - * Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. - * This method allocates and returns a new instance of ReadBackedPileup. - * - * @param minMapQ - * @return - */ - @Override - public RBP getMappingFilteredPileup(int minMapQ) { - return getBaseAndMappingFilteredPileup(-1, minMapQ); - } - - /** - * Gets a list of the read groups represented in this pileup. - * - * @return - */ - @Override - public Collection getReadGroups() { - Set readGroups = new HashSet(); - for (PileupElement pileupElement : this) - readGroups.add(pileupElement.getRead().getReadGroup().getReadGroupId()); - return readGroups; - } - - /** - * Gets the pileup for a given read group. Horrendously inefficient at this point. - * - * @param targetReadGroupId Identifier for the read group. - * @return A read-backed pileup containing only the reads in the given read group. - */ - @Override - public RBP getPileupForReadGroup(String targetReadGroupId) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroup(targetReadGroupId); - if (pileup != null) - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } else { - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for (PE p : pileupElementTracker) { - GATKSAMRecord read = p.getRead(); - if (targetReadGroupId != null) { - if (read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId())) - filteredTracker.add(p); - } else { - if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) - filteredTracker.add(p); - } - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } - } - - /** - * Gets the pileup for a set of read groups. Horrendously inefficient at this point. - * - * @param rgSet List of identifiers for the read groups. - * @return A read-backed pileup containing only the reads in the given read groups. - */ - @Override - public RBP getPileupForReadGroups(final HashSet rgSet) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroups(rgSet); - if (pileup != null) - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } else { - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for (PE p : pileupElementTracker) { - GATKSAMRecord read = p.getRead(); - if (rgSet != null && !rgSet.isEmpty()) { - if (read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId())) - filteredTracker.add(p); - } else { - if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) - filteredTracker.add(p); - } - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } - } - - @Override - public RBP getPileupForLane(String laneID) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForLane(laneID); - if (pileup != null) - filteredTracker.addElements(sample, pileup.pileupElementTracker); - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } else { - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for (PE p : pileupElementTracker) { - GATKSAMRecord read = p.getRead(); - if (laneID != null) { - if (read.getReadGroup() != null && - (read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different - (read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same - filteredTracker.add(p); - } else { - if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) - filteredTracker.add(p); - } - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } - } - - public Collection getSamples() { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - return new HashSet(tracker.getSamples()); - } else { - Collection sampleNames = new HashSet(); - for (PileupElement p : this) { - GATKSAMRecord read = p.getRead(); - String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; - sampleNames.add(sampleName); - } - return sampleNames; - } - } - - /** - * Returns a pileup randomly downsampled to the desiredCoverage. - * - * TODO: delete this once the experimental downsampler stabilizes - * - * @param desiredCoverage - * @return - */ - @Override - public RBP getDownsampledPileup(int desiredCoverage) { - if (getNumberOfElements() <= desiredCoverage) - return (RBP) this; - - // randomly choose numbers corresponding to positions in the reads list - TreeSet positions = new TreeSet(); - for (int i = 0; i < desiredCoverage; /* no update */) { - if (positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(size))) - i++; - } - - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); - - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - - int current = 0; - UnifiedPileupElementTracker filteredPileup = new UnifiedPileupElementTracker(); - for (PE p : perSampleElements) { - if (positions.contains(current)) - filteredPileup.add(p); - current++; - - } - filteredTracker.addElements(sample, filteredPileup); - } - - return (RBP) createNewPileup(loc, filteredTracker); - } else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - - Iterator positionIter = positions.iterator(); - - while (positionIter.hasNext()) { - int nextReadToKeep = (Integer) positionIter.next(); - filteredTracker.add(tracker.get(nextReadToKeep)); - } - - return (RBP) createNewPileup(getLocation(), filteredTracker); - } - } - - @Override - public RBP getPileupForSamples(Collection sampleNames) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PileupElementTracker filteredElements = tracker.getElements(sampleNames); - return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null; - } else { - HashSet hashSampleNames = new HashSet(sampleNames); // to speed up the "contains" access in the for loop - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for (PE p : pileupElementTracker) { - GATKSAMRecord read = p.getRead(); - if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else. - if (read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample())) - filteredTracker.add(p); - } else { - if (read.getReadGroup() == null || read.getReadGroup().getSample() == null) - filteredTracker.add(p); - } - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } - } - - @Override - public Map getPileupsForSamples(Collection sampleNames) { - Map result = new HashMap(); - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - for (String sample : sampleNames) { - PileupElementTracker filteredElements = tracker.getElements(sample); - if (filteredElements != null) - result.put(sample, createNewPileup(loc, filteredElements)); - } - } else { - Map> trackerMap = new HashMap>(); - - for (String sample : sampleNames) { // initialize pileups for each sample - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - trackerMap.put(sample, filteredTracker); - } - for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup - GATKSAMRecord read = p.getRead(); - if (read.getReadGroup() != null) { - String sample = read.getReadGroup().getSample(); - UnifiedPileupElementTracker tracker = trackerMap.get(sample); - if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest - tracker.add(p); - } - } - for (Map.Entry> entry : trackerMap.entrySet()) // create the RBP for each sample - result.put(entry.getKey(), createNewPileup(loc, entry.getValue())); - } - return result; - } - - - @Override - public RBP getPileupForSample(String sampleName) { - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - PileupElementTracker filteredElements = tracker.getElements(sampleName); - return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null; - } else { - UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); - for (PE p : pileupElementTracker) { - GATKSAMRecord read = p.getRead(); - if (sampleName != null) { - if (read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) - filteredTracker.add(p); - } else { - if (read.getReadGroup() == null || read.getReadGroup().getSample() == null) - filteredTracker.add(p); - } - } - return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null; - } - } - - // -------------------------------------------------------- - // - // iterators - // - // -------------------------------------------------------- - - /** - * The best way to access PileupElements where you only care about the bases and quals in the pileup. - *

- * for (PileupElement p : this) { doSomething(p); } - *

- * Provides efficient iteration of the data. - * - * @return - */ - @Override - public Iterator iterator() { - return new Iterator() { - private final Iterator wrappedIterator = pileupElementTracker.iterator(); - - public boolean hasNext() { - return wrappedIterator.hasNext(); - } - - public PileupElement next() { - return wrappedIterator.next(); - } - - public void remove() { - throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); - } - }; - } - - /** - * The best way to access PileupElements where you only care not only about bases and quals in the pileup - * but also need access to the index of the pileup element in the pile. - * - * for (ExtendedPileupElement p : this) { doSomething(p); } - * - * Provides efficient iteration of the data. - * - * @return - */ - - /** - * Simple useful routine to count the number of deletion bases in this pileup - * - * @return - */ - @Override - public int getNumberOfDeletions() { - return nDeletions; - } - - @Override - public int getNumberOfMappingQualityZeroReads() { - return nMQ0Reads; - } - - /** - * @return the number of physical elements in this pileup - */ - @Override - public int getNumberOfElements() { - return size; - } - - /** - * @return the number of abstract elements in this pileup - */ - @Override - public int depthOfCoverage() { - if (abstractSize == -1) - calculateAbstractSize(); - return abstractSize; - } - - /** - * @return true if there are 0 elements in the pileup, false otherwise - */ - @Override - public boolean isEmpty() { - return size == 0; - } - - - /** - * @return the location of this pileup - */ - @Override - public GenomeLoc getLocation() { - return loc; - } - - /** - * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according - * to BaseUtils.simpleBaseToBaseIndex for each base. - * - * @return - */ - @Override - public int[] getBaseCounts() { - int[] counts = new int[4]; - - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - for (final String sample : tracker.getSamples()) { - int[] countsBySample = createNewPileup(loc, tracker.getElements(sample)).getBaseCounts(); - for (int i = 0; i < counts.length; i++) - counts[i] += countsBySample[i]; - } - } else { - for (PileupElement pile : this) { - // skip deletion sites - if (!pile.isDeletion()) { - int index = BaseUtils.simpleBaseToBaseIndex((char) pile.getBase()); - if (index != -1) - counts[index]++; - } - } - } - - return counts; - } - - @Override - public String getPileupString(Character ref) { - // In the pileup format, each line represents a genomic position, consisting of chromosome name, - // coordinate, reference base, read bases, read qualities and alignment mapping qualities. - return String.format("%s %s %c %s %s", - getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate - ref, // reference base - new String(getBases()), - getQualsString()); - } - - // -------------------------------------------------------- - // - // Convenience functions that may be slow - // - // -------------------------------------------------------- - - /** - * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time - * - * @return - */ - @Override - public List getReads() { - List reads = new ArrayList(getNumberOfElements()); - for (PileupElement pile : this) { - reads.add(pile.getRead()); - } - return reads; - } - - @Override - public int getNumberOfDeletionsAfterThisElement() { - int count = 0; - for (PileupElement p : this) { - if (p.isBeforeDeletionStart()) - count++; - } - return count; - } - - @Override - public int getNumberOfInsertionsAfterThisElement() { - int count = 0; - for (PileupElement p : this) { - if (p.isBeforeInsertion()) - count++; - } - return count; - - } - /** - * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time - * - * @return - */ - @Override - public List getOffsets() { - List offsets = new ArrayList(getNumberOfElements()); - for (PileupElement pile : this) { - offsets.add(pile.getOffset()); - } - return offsets; - } - - /** - * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time - * - * @return - */ - @Override - public byte[] getBases() { - byte[] v = new byte[getNumberOfElements()]; - int pos = 0; - for (PileupElement pile : pileupElementTracker) { - v[pos++] = pile.getBase(); - } - return v; - } - - /** - * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time - * - * @return - */ - @Override - public byte[] getQuals() { - byte[] v = new byte[getNumberOfElements()]; - int pos = 0; - for (PileupElement pile : pileupElementTracker) { - v[pos++] = pile.getQual(); - } - return v; - } - - /** - * Get an array of the mapping qualities - * - * @return - */ - @Override - public byte[] getMappingQuals() { - byte[] v = new byte[getNumberOfElements()]; - int pos = 0; - for (PileupElement pile : pileupElementTracker) { - v[pos++] = (byte) pile.getRead().getMappingQuality(); - } - return v; - } - - static String quals2String(byte[] quals) { - StringBuilder qualStr = new StringBuilder(); - for (int qual : quals) { - qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea - char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 - qualStr.append(qualChar); - } - - return qualStr.toString(); - } - - private String getQualsString() { - return quals2String(getQuals()); - } - - /** - * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads. - * - * @return - */ - @Override - public RBP getStartSortedPileup() { - - final TreeSet sortedElements = new TreeSet(new Comparator() { - @Override - public int compare(PE element1, PE element2) { - final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart(); - return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName()); - } - }); - - if (pileupElementTracker instanceof PerSamplePileupElementTracker) { - PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; - - for (final String sample : tracker.getSamples()) { - PileupElementTracker perSampleElements = tracker.getElements(sample); - for (PE pile : perSampleElements) - sortedElements.add(pile); - } - } - else { - UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; - for (PE pile : tracker) - sortedElements.add(pile); - } - - UnifiedPileupElementTracker sortedTracker = new UnifiedPileupElementTracker(); - for (PE pile : sortedElements) - sortedTracker.add(pile); - - return (RBP) createNewPileup(loc, sortedTracker); - } - - @Override - public FragmentCollection toFragments() { - return FragmentUtils.create(this); - } - - @Override - public ReadBackedPileup copy() { - return new ReadBackedPileupImpl(loc, (PileupElementTracker) pileupElementTracker.copy()); - } -} - - diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java index 0a0d4ab9c..288b033cb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java @@ -25,6 +25,8 @@ package org.broadinstitute.sting.utils.pileup; +import org.apache.commons.collections.iterators.IteratorChain; + import java.util.*; /** @@ -35,6 +37,20 @@ import java.util.*; */ abstract class PileupElementTracker implements Iterable { public abstract int size(); + + /** + * Iterate through the PEs here, but in any order, which may improve performance + * if you don't care about the underlying order the reads are coming to you in. + * @return an iteratable over all pileup elements in this tracker + */ + public abstract Iterable unorderedIterable(); + + /** + * Same as @see #unorderedIterable but the actual iterator itself + * @return + */ + public Iterator unorderedIterator() { return unorderedIterable().iterator(); } + public abstract PileupElementTracker copy(); } @@ -65,6 +81,7 @@ class UnifiedPileupElementTracker extends PileupElemen } public Iterator iterator() { return pileup.iterator(); } + public Iterable unorderedIterable() { return this; } } class PerSamplePileupElementTracker extends PileupElementTracker { @@ -113,4 +130,25 @@ class PerSamplePileupElementTracker extends PileupElem public int size() { return size; } + + + public Iterable unorderedIterable() { + return new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + final private IteratorChain chain = new IteratorChain(); + + { // initialize the chain with the unordered iterators of the per sample pileups + for ( PileupElementTracker pet : pileup.values() ) { + chain.addIterator(pet.unorderedIterator()); + } + } + @Override public boolean hasNext() { return chain.hasNext(); } + @Override public PE next() { return (PE)chain.next(); } + @Override public void remove() { throw new UnsupportedOperationException("Cannot remove"); } + }; + } + }; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index fa42964b9..fe43f85bd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -25,33 +25,64 @@ package org.broadinstitute.sting.utils.pileup; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.variant.utils.BaseUtils; -import java.util.List; -import java.util.Map; +import java.util.*; -public class ReadBackedPileupImpl extends AbstractReadBackedPileup implements ReadBackedPileup { +public class ReadBackedPileupImpl implements ReadBackedPileup { + protected final GenomeLoc loc; + protected final PileupElementTracker pileupElementTracker; - public ReadBackedPileupImpl(GenomeLoc loc) { - super(loc); - } + private final static int UNINITIALIZED_CACHED_INT_VALUE = -1; + /** + * Different then number of elements due to reduced reads + */ + private int depthOfCoverage = UNINITIALIZED_CACHED_INT_VALUE; + private int nDeletions = UNINITIALIZED_CACHED_INT_VALUE; // cached value of the number of deletions + private int nMQ0Reads = UNINITIALIZED_CACHED_INT_VALUE; // cached value of the number of MQ0 reads + + /** + * Create a new version of a read backed pileup at loc, using the reads and their corresponding + * offsets. This pileup will contain a list, in order of the reads, of the piled bases at + * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to + * go changing the reads. + * + * @param loc The genome loc to associate reads wotj + * @param reads + * @param offsets + */ public ReadBackedPileupImpl(GenomeLoc loc, List reads, List offsets) { - super(loc, reads, offsets); + this.loc = loc; + this.pileupElementTracker = readsOffsets2Pileup(reads, offsets); } - public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset) { - super(loc, reads, offset); + + /** + * Create a new version of a read backed pileup at loc without any aligned reads + */ + public ReadBackedPileupImpl(GenomeLoc loc) { + this(loc, new UnifiedPileupElementTracker()); } - public ReadBackedPileupImpl(GenomeLoc loc, List pileupElements) { - super(loc, pileupElements); - } + /** + * Create a new version of a read backed pileup at loc, using the reads and their corresponding + * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a + * pointer to pileup. Don't go changing the data in pileup. + */ + public ReadBackedPileupImpl(GenomeLoc loc, List pileup) { + if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup"); + if (pileup == null) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup"); - public ReadBackedPileupImpl(GenomeLoc loc, Map pileupElementsBySample) { - super(loc, pileupElementsBySample); + this.loc = loc; + this.pileupElementTracker = new UnifiedPileupElementTracker(pileup); } /** @@ -59,25 +90,954 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup pileup, int size, int nDeletions, int nMQ0Reads) { - super(loc, pileup, size, nDeletions, nMQ0Reads); + this(loc, pileup); } protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker tracker) { - super(loc, tracker); + this.loc = loc; + this.pileupElementTracker = tracker; + } + + public ReadBackedPileupImpl(GenomeLoc loc, Map pileupsBySample) { + this.loc = loc; + PerSamplePileupElementTracker tracker = new PerSamplePileupElementTracker(); + for (Map.Entry pileupEntry : pileupsBySample.entrySet()) { + tracker.addElements(pileupEntry.getKey(), pileupEntry.getValue().pileupElementTracker); + } + this.pileupElementTracker = tracker; + } + + public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset) { + this.loc = loc; + this.pileupElementTracker = readsOffsets2Pileup(reads, offset); + } + + /** + * Helper routine for converting reads and offset lists to a PileupElement list. + * + * @param reads + * @param offsets + * @return + */ + private PileupElementTracker readsOffsets2Pileup(List reads, List offsets) { + if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); + if (offsets == null) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup"); + if (reads.size() != offsets.size()) + throw new ReviewedStingException("Reads and offset lists have different sizes!"); + + UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); + for (int i = 0; i < reads.size(); i++) { + GATKSAMRecord read = reads.get(i); + int offset = offsets.get(i); + pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important + } + + return pileup; + } + + /** + * Helper routine for converting reads and a single offset to a PileupElement list. + * + * @param reads + * @param offset + * @return + */ + private PileupElementTracker readsOffsets2Pileup(List reads, int offset) { + if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup"); + if (offset < 0) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup"); + + UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker(); + for (GATKSAMRecord read : reads) { + pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important + } + + return pileup; } - @Override protected ReadBackedPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker tracker) { return new ReadBackedPileupImpl(loc, tracker); } - @Override protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) { return LocusIteratorByState.createPileupForReadAndOffset(read, offset); + } + + // -------------------------------------------------------- + // + // Special 'constructors' + // + // -------------------------------------------------------- + + /** + * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no deletions in the pileup. + * + * @return + */ + @Override + public ReadBackedPileupImpl getPileupWithoutDeletions() { + if (getNumberOfDeletions() > 0) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPileupWithoutDeletions(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return createNewPileup(loc, filteredTracker); + + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for (PileupElement p : tracker) { + if (!p.isDeletion()) { + filteredTracker.add(p); + } + } + return createNewPileup(loc, filteredTracker); + } + } else { + return this; + } + } + + /** + * Returns a new ReadBackedPileup where only one read from an overlapping read + * pair is retained. If the two reads in question disagree to their basecall, + * neither read is retained. If they agree on the base, the read with the higher + * base quality observation is retained + * + * @return the newly filtered pileup + */ + @Override + public ReadBackedPileup getOverlappingFragmentFilteredPileup() { + return getOverlappingFragmentFilteredPileup(true, true); + } + + /** + * Returns a new ReadBackedPileup where only one read from an overlapping read + * pair is retained. If discardDiscordant and the two reads in question disagree to their basecall, + * neither read is retained. Otherwise, the read with the higher + * quality (base or mapping, depending on baseQualNotMapQual) observation is retained + * + * @return the newly filtered pileup + */ + @Override + public ReadBackedPileupImpl getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(discardDiscordant, baseQualNotMapQual); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return createNewPileup(loc, filteredTracker); + } else { + Map filteredPileup = new HashMap(); + + for (PileupElement p : pileupElementTracker) { + String readName = p.getRead().getReadName(); + + // if we've never seen this read before, life is good + if (!filteredPileup.containsKey(readName)) { + filteredPileup.put(readName, p); + } else { + PileupElement existing = filteredPileup.get(readName); + + // if the reads disagree at this position, throw them both out. Otherwise + // keep the element with the higher quality score + if (discardDiscordant && existing.getBase() != p.getBase()) { + filteredPileup.remove(readName); + } else { + if (baseQualNotMapQual) { + if (existing.getQual() < p.getQual()) + filteredPileup.put(readName, p); + } + else { + if (existing.getMappingQual() < p.getMappingQual()) + filteredPileup.put(readName, p); + } + } + } + } + + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for (PileupElement filteredElement : filteredPileup.values()) + filteredTracker.add(filteredElement); + + return createNewPileup(loc, filteredTracker); + } + } + + + /** + * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this + * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy + * of the pileup (just returns this) if there are no MQ0 reads in the pileup. + * + * @return + */ + @Override + public ReadBackedPileupImpl getPileupWithoutMappingQualityZeroReads() { + if (getNumberOfMappingQualityZeroReads() > 0) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPileupWithoutMappingQualityZeroReads(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return createNewPileup(loc, filteredTracker); + + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for (PileupElement p : tracker) { + if (p.getRead().getMappingQuality() > 0) { + filteredTracker.add(p); + } + } + return createNewPileup(loc, filteredTracker); + } + } else { + return this; + } + } + + public ReadBackedPileupImpl getPositiveStrandPileup() { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPositiveStrandPileup(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for (PileupElement p : tracker) { + if (!p.getRead().getReadNegativeStrandFlag()) { + filteredTracker.add(p); + } + } + return createNewPileup(loc, filteredTracker); + } + } + + /** + * Gets the pileup consisting of only reads on the negative strand. + * + * @return A read-backed pileup consisting only of reads on the negative strand. + */ + public ReadBackedPileupImpl getNegativeStrandPileup() { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getNegativeStrandPileup(); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for (PileupElement p : tracker) { + if (p.getRead().getReadNegativeStrandFlag()) { + filteredTracker.add(p); + } + } + return createNewPileup(loc, filteredTracker); + } + } + + /** + * Gets a pileup consisting of all those elements passed by a given filter. + * + * @param filter Filter to use when testing for elements. + * @return a pileup without the given filtered elements. + */ + public ReadBackedPileupImpl getFilteredPileup(PileupElementFilter filter) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getFilteredPileup(filter); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + + return createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for (PileupElement p : pileupElementTracker) { + if (filter.allow(p)) + filteredTracker.add(p); + } + + return createNewPileup(loc, filteredTracker); + } + } + + /** + * Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from + * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup. + * + * @param minBaseQ + * @param minMapQ + * @return + */ + @Override + public ReadBackedPileupImpl getBaseAndMappingFilteredPileup(int minBaseQ, int minMapQ) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ, minMapQ); + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + + return createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + for (PileupElement p : pileupElementTracker) { + if (p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ)) { + filteredTracker.add(p); + } + } + + return createNewPileup(loc, filteredTracker); + } + } + + /** + * Returns subset of this pileup that contains only bases with quality >= minBaseQ. + * This method allocates and returns a new instance of ReadBackedPileup. + * + * @param minBaseQ + * @return + */ + @Override + public ReadBackedPileup getBaseFilteredPileup(int minBaseQ) { + return getBaseAndMappingFilteredPileup(minBaseQ, -1); + } + + /** + * Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ. + * This method allocates and returns a new instance of ReadBackedPileup. + * + * @param minMapQ + * @return + */ + @Override + public ReadBackedPileup getMappingFilteredPileup(int minMapQ) { + return getBaseAndMappingFilteredPileup(-1, minMapQ); + } + + /** + * Gets a list of the read groups represented in this pileup. + * + * @return + */ + @Override + public Collection getReadGroups() { + Set readGroups = new HashSet(); + for (PileupElement pileupElement : this) + readGroups.add(pileupElement.getRead().getReadGroup().getReadGroupId()); + return readGroups; + } + + /** + * Gets the pileup for a given read group. Horrendously inefficient at this point. + * + * @param targetReadGroupId Identifier for the read group. + * @return A read-backed pileup containing only the reads in the given read group. + */ + @Override + public ReadBackedPileupImpl getPileupForReadGroup(String targetReadGroupId) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroup(targetReadGroupId); + if (pileup != null) + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for (PileupElement p : pileupElementTracker) { + GATKSAMRecord read = p.getRead(); + if (targetReadGroupId != null) { + if (read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId())) + filteredTracker.add(p); + } else { + if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) + filteredTracker.add(p); + } + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } + } + + /** + * Gets the pileup for a set of read groups. Horrendously inefficient at this point. + * + * @param rgSet List of identifiers for the read groups. + * @return A read-backed pileup containing only the reads in the given read groups. + */ + @Override + public ReadBackedPileupImpl getPileupForReadGroups(final HashSet rgSet) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroups(rgSet); + if (pileup != null) + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for (PileupElement p : pileupElementTracker) { + GATKSAMRecord read = p.getRead(); + if (rgSet != null && !rgSet.isEmpty()) { + if (read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId())) + filteredTracker.add(p); + } else { + if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) + filteredTracker.add(p); + } + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } + } + + @Override + public ReadBackedPileupImpl getPileupForLane(String laneID) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPileupForLane(laneID); + if (pileup != null) + filteredTracker.addElements(sample, pileup.pileupElementTracker); + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for (PileupElement p : pileupElementTracker) { + GATKSAMRecord read = p.getRead(); + if (laneID != null) { + if (read.getReadGroup() != null && + (read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different + (read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same + filteredTracker.add(p); + } else { + if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null) + filteredTracker.add(p); + } + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } + } + + public Collection getSamples() { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + return new HashSet(tracker.getSamples()); + } else { + Collection sampleNames = new HashSet(); + for (PileupElement p : this) { + GATKSAMRecord read = p.getRead(); + String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null; + sampleNames.add(sampleName); + } + return sampleNames; + } + } + + /** + * Returns a pileup randomly downsampled to the desiredCoverage. + * + * TODO: delete this once the experimental downsampler stabilizes + * + * @param desiredCoverage + * @return + */ + @Override + public ReadBackedPileup getDownsampledPileup(int desiredCoverage) { + if (getNumberOfElements() <= desiredCoverage) + return this; + + // randomly choose numbers corresponding to positions in the reads list + TreeSet positions = new TreeSet(); + for (int i = 0; i < desiredCoverage; /* no update */) { + if (positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(getNumberOfElements()))) + i++; + } + + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker(); + + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + + int current = 0; + UnifiedPileupElementTracker filteredPileup = new UnifiedPileupElementTracker(); + for (PileupElement p : perSampleElements) { + if (positions.contains(current)) + filteredPileup.add(p); + current++; + + } + filteredTracker.addElements(sample, filteredPileup); + } + + return createNewPileup(loc, filteredTracker); + } else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + + Iterator positionIter = positions.iterator(); + + while (positionIter.hasNext()) { + int nextReadToKeep = (Integer) positionIter.next(); + filteredTracker.add(tracker.get(nextReadToKeep)); + } + + return createNewPileup(getLocation(), filteredTracker); + } + } + + @Override + public ReadBackedPileup getPileupForSamples(Collection sampleNames) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PileupElementTracker filteredElements = tracker.getElements(sampleNames); + return filteredElements != null ? createNewPileup(loc, filteredElements) : null; + } else { + HashSet hashSampleNames = new HashSet(sampleNames); // to speed up the "contains" access in the for loop + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for (PileupElement p : pileupElementTracker) { + GATKSAMRecord read = p.getRead(); + if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else. + if (read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample())) + filteredTracker.add(p); + } else { + if (read.getReadGroup() == null || read.getReadGroup().getSample() == null) + filteredTracker.add(p); + } + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } + } + + @Override + public Map getPileupsForSamples(Collection sampleNames) { + Map result = new HashMap(); + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + for (String sample : sampleNames) { + PileupElementTracker filteredElements = tracker.getElements(sample); + if (filteredElements != null) + result.put(sample, createNewPileup(loc, filteredElements)); + } + } else { + Map> trackerMap = new HashMap>(); + + for (String sample : sampleNames) { // initialize pileups for each sample + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + trackerMap.put(sample, filteredTracker); + } + for (PileupElement p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup + GATKSAMRecord read = p.getRead(); + if (read.getReadGroup() != null) { + String sample = read.getReadGroup().getSample(); + UnifiedPileupElementTracker tracker = trackerMap.get(sample); + if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest + tracker.add(p); + } + } + for (Map.Entry> entry : trackerMap.entrySet()) // create the ReadBackedPileup for each sample + result.put(entry.getKey(), createNewPileup(loc, entry.getValue())); + } + return result; + } + + + @Override + public ReadBackedPileup getPileupForSample(String sampleName) { + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + PileupElementTracker filteredElements = tracker.getElements(sampleName); + return filteredElements != null ? createNewPileup(loc, filteredElements) : null; + } else { + UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker(); + for (PileupElement p : pileupElementTracker) { + GATKSAMRecord read = p.getRead(); + if (sampleName != null) { + if (read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample())) + filteredTracker.add(p); + } else { + if (read.getReadGroup() == null || read.getReadGroup().getSample() == null) + filteredTracker.add(p); + } + } + return filteredTracker.size() > 0 ? createNewPileup(loc, filteredTracker) : null; + } + } + + // -------------------------------------------------------- + // + // iterators + // + // -------------------------------------------------------- + + /** + * The best way to access PileupElements where you only care about the bases and quals in the pileup. + *

+ * for (PileupElement p : this) { doSomething(p); } + *

+ * Provides efficient iteration of the data. + * + * @return + */ + @Override + public Iterator iterator() { + return new Iterator() { + private final Iterator wrappedIterator = pileupElementTracker.iterator(); + + public boolean hasNext() { + return wrappedIterator.hasNext(); + } + + public PileupElement next() { + return wrappedIterator.next(); + } + + public void remove() { + throw new UnsupportedOperationException("Cannot remove from a pileup element iterator"); + } + }; + } + + /** + * The best way to access PileupElements where you only care not only about bases and quals in the pileup + * but also need access to the index of the pileup element in the pile. + * + * for (ExtendedPileupElement p : this) { doSomething(p); } + * + * Provides efficient iteration of the data. + * + * @return + */ + + /** + * Simple useful routine to count the number of deletion bases in this pileup + * + * @return + */ + @Override + public int getNumberOfDeletions() { + if ( nDeletions == UNINITIALIZED_CACHED_INT_VALUE ) { + nDeletions = 0; + for (PileupElement p : pileupElementTracker.unorderedIterable() ) { + if (p.isDeletion()) { + nDeletions++; + } + } + } + return nDeletions; + } + + @Override + public int getNumberOfMappingQualityZeroReads() { + if ( nMQ0Reads == UNINITIALIZED_CACHED_INT_VALUE ) { + nMQ0Reads = 0; + + for (PileupElement p : pileupElementTracker.unorderedIterable()) { + if (p.getRead().getMappingQuality() == 0) { + nMQ0Reads++; + } + } + } + + return nMQ0Reads; + } + + /** + * @return the number of physical elements in this pileup + */ + @Override + public int getNumberOfElements() { + return pileupElementTracker.size(); + } + + /** + * @return the number of abstract elements in this pileup + */ + @Override + public int depthOfCoverage() { + if (depthOfCoverage == UNINITIALIZED_CACHED_INT_VALUE) { + depthOfCoverage = 0; + for (PileupElement p : pileupElementTracker.unorderedIterable()) { + depthOfCoverage += p.getRepresentativeCount(); + } + } + return depthOfCoverage; + } + + /** + * @return true if there are 0 elements in the pileup, false otherwise + */ + @Override + public boolean isEmpty() { + return getNumberOfElements() == 0; + } + + + /** + * @return the location of this pileup + */ + @Override + public GenomeLoc getLocation() { + return loc; + } + + /** + * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according + * to BaseUtils.simpleBaseToBaseIndex for each base. + * + * @return + */ + @Override + public int[] getBaseCounts() { + int[] counts = new int[4]; + + // TODO -- can be optimized with .unorderedIterable() + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + for (final String sample : tracker.getSamples()) { + int[] countsBySample = createNewPileup(loc, tracker.getElements(sample)).getBaseCounts(); + for (int i = 0; i < counts.length; i++) + counts[i] += countsBySample[i]; + } + } else { + for (PileupElement pile : this) { + // skip deletion sites + if (!pile.isDeletion()) { + int index = BaseUtils.simpleBaseToBaseIndex((char) pile.getBase()); + if (index != -1) + counts[index]++; + } + } + } + + return counts; + } + + @Override + public String getPileupString(Character ref) { + // In the pileup format, each line represents a genomic position, consisting of chromosome name, + // coordinate, reference base, read bases, read qualities and alignment mapping qualities. + return String.format("%s %s %c %s %s", + getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate + ref, // reference base + new String(getBases()), + getQualsString()); + } + + // -------------------------------------------------------- + // + // Convenience functions that may be slow + // + // -------------------------------------------------------- + + /** + * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time + * + * @return + */ + @Override + public List getReads() { + List reads = new ArrayList(getNumberOfElements()); + for (PileupElement pile : this) { + reads.add(pile.getRead()); + } + return reads; + } + + @Override + public int getNumberOfDeletionsAfterThisElement() { + int count = 0; + for (PileupElement p : pileupElementTracker.unorderedIterable()) { + if (p.isBeforeDeletionStart()) + count++; + } + return count; + } + + @Override + public int getNumberOfInsertionsAfterThisElement() { + int count = 0; + for (PileupElement p : pileupElementTracker.unorderedIterable()) { + if (p.isBeforeInsertion()) + count++; + } + return count; + + } + /** + * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time + * + * @return + */ + @Override + public List getOffsets() { + List offsets = new ArrayList(getNumberOfElements()); + for (PileupElement pile : pileupElementTracker.unorderedIterable()) { + offsets.add(pile.getOffset()); + } + return offsets; + } + + /** + * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time + * + * @return + */ + @Override + public byte[] getBases() { + byte[] v = new byte[getNumberOfElements()]; + int pos = 0; + for (PileupElement pile : pileupElementTracker) { + v[pos++] = pile.getBase(); + } + return v; + } + + /** + * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time + * + * @return + */ + @Override + public byte[] getQuals() { + byte[] v = new byte[getNumberOfElements()]; + int pos = 0; + for (PileupElement pile : pileupElementTracker) { + v[pos++] = pile.getQual(); + } + return v; + } + + /** + * Get an array of the mapping qualities + * + * @return + */ + @Override + public byte[] getMappingQuals() { + byte[] v = new byte[getNumberOfElements()]; + int pos = 0; + for (PileupElement pile : pileupElementTracker) { + v[pos++] = (byte) pile.getRead().getMappingQuality(); + } + return v; + } + + static String quals2String(byte[] quals) { + StringBuilder qualStr = new StringBuilder(); + for (int qual : quals) { + qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea + char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63 + qualStr.append(qualChar); + } + + return qualStr.toString(); + } + + private String getQualsString() { + return quals2String(getQuals()); + } + + /** + * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads. + * + * @return + */ + @Override + public ReadBackedPileup getStartSortedPileup() { + + final TreeSet sortedElements = new TreeSet(new Comparator() { + @Override + public int compare(PileupElement element1, PileupElement element2) { + final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart(); + return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName()); + } + }); + + if (pileupElementTracker instanceof PerSamplePileupElementTracker) { + PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker; + + for (final String sample : tracker.getSamples()) { + PileupElementTracker perSampleElements = tracker.getElements(sample); + for (PileupElement pile : perSampleElements) + sortedElements.add(pile); + } + } + else { + UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker; + for (PileupElement pile : tracker) + sortedElements.add(pile); + } + + UnifiedPileupElementTracker sortedTracker = new UnifiedPileupElementTracker(); + for (PileupElement pile : sortedElements) + sortedTracker.add(pile); + + return createNewPileup(loc, sortedTracker); + } + + @Override + public FragmentCollection toFragments() { + return FragmentUtils.create(this); + } + + @Override + public ReadBackedPileup copy() { + return new ReadBackedPileupImpl(loc, pileupElementTracker.copy()); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java index 3951de93d..18fa8a302 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java @@ -25,12 +25,18 @@ package org.broadinstitute.sting.utils.pileup; +import net.sf.samtools.CigarElement; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMReadGroupRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.*; @@ -39,6 +45,17 @@ import java.util.*; * Test routines for read-backed pileup. */ public class ReadBackedPileupUnitTest { + protected static SAMFileHeader header; + protected GenomeLocParser genomeLocParser; + private GenomeLoc loc; + + @BeforeClass + public void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + loc = genomeLocParser.createGenomeLoc("chr1", 1); + } + /** * Ensure that basic read group splitting works. */ @@ -195,4 +212,98 @@ public class ReadBackedPileupUnitTest { missingSamplePileup = pileup.getPileupForSample("not here"); Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't"); } -} + + private static int sampleI = 0; + private class RBPCountTest { + final String sample; + final int nReads, nMapq0, nDeletions; + + private RBPCountTest(int nReads, int nMapq0, int nDeletions) { + this.sample = "sample" + sampleI++; + this.nReads = nReads; + this.nMapq0 = nMapq0; + this.nDeletions = nDeletions; + } + + private List makeReads( final int n, final int mapq, final String op ) { + final int readLength = 3; + + final List elts = new LinkedList(); + for ( int i = 0; i < n; i++ ) { + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, readLength); + read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); + read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); + read.setCigarString("1M1" + op + "1M"); + read.setMappingQuality(mapq); + final int baseOffset = op.equals("M") ? 1 : 0; + final CigarElement cigarElement = read.getCigar().getCigarElement(1); + elts.add(new PileupElement(read, baseOffset, cigarElement, 1, 0)); + } + + return elts; + } + + private ReadBackedPileupImpl makePileup() { + final List elts = new LinkedList(); + + elts.addAll(makeReads(nMapq0, 0, "M")); + elts.addAll(makeReads(nDeletions, 30, "D")); + elts.addAll(makeReads(nReads - nMapq0 - nDeletions, 30, "M")); + + return new ReadBackedPileupImpl(loc, elts); + } + + @Override + public String toString() { + return "RBPCountTest{" + + "sample='" + sample + '\'' + + ", nReads=" + nReads + + ", nMapq0=" + nMapq0 + + ", nDeletions=" + nDeletions + + '}'; + } + } + + @DataProvider(name = "RBPCountingTest") + public Object[][] makeRBPCountingTest() { + final List tests = new LinkedList(); + + for ( final int nMapq : Arrays.asList(0, 10, 20) ) { + for ( final int nDeletions : Arrays.asList(0, 10, 20) ) { + for ( final int nReg : Arrays.asList(0, 10, 20) ) { + final int total = nMapq + nDeletions + nReg; + if ( total > 0 ) + tests.add(new Object[]{new RBPCountTest(total, nMapq, nDeletions)}); + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "RBPCountingTest") + public void testRBPCountingTestSinglePileup(RBPCountTest params) { + testRBPCounts(params.makePileup(), params); + } + + @Test(dataProvider = "RBPCountingTest") + public void testRBPCountingTestMultiSample(RBPCountTest params) { + final RBPCountTest newSample = new RBPCountTest(2, 1, 1); + final Map pileupsBySample = new HashMap(); + pileupsBySample.put(newSample.sample, newSample.makePileup()); + pileupsBySample.put(params.sample, params.makePileup()); + final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample); + testRBPCounts(pileup, new RBPCountTest(params.nReads + 2, params.nMapq0 + 1, params.nDeletions + 1)); + } + + + private void testRBPCounts(final ReadBackedPileup rbp, RBPCountTest expected) { + for ( int cycles = 0; cycles < 3; cycles++ ) { + // multiple cycles to make sure caching is working + Assert.assertEquals(rbp.getNumberOfElements(), expected.nReads); + Assert.assertEquals(rbp.depthOfCoverage(), expected.nReads); + Assert.assertEquals(rbp.getNumberOfDeletions(), expected.nDeletions); + Assert.assertEquals(rbp.getNumberOfMappingQualityZeroReads(), expected.nMapq0); + } + } +} \ No newline at end of file