diff --git a/ivy.xml b/ivy.xml
index 6b60acfa3..1802c1627 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -61,6 +61,7 @@
+
diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java
index 22de68a5d..fe769bead 100644
--- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java
@@ -242,39 +242,30 @@ public class LocusIteratorByState extends LocusIterator {
final Iterator iterator = readStates.iterator(sample);
final List pile = new ArrayList(readStates.size(sample));
- int size = 0; // number of elements in this sample's pileup
- int nDeletions = 0; // number of deletions in this sample's pileup
- int nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
-
while (iterator.hasNext()) {
- final AlignmentStateMachine state = iterator.next(); // state object with the read/offset information
- final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
- final CigarOperator op = state.getCigarOperator(); // current cigar operator
+ // state object with the read/offset information
+ final AlignmentStateMachine state = iterator.next();
+ final GATKSAMRecord read = (GATKSAMRecord) state.getRead();
+ final CigarOperator op = state.getCigarOperator();
- if (op == CigarOperator.N) // N's are never added to any pileup
+ if (op == CigarOperator.N) // N's are never added to any pileup
continue;
if (!dontIncludeReadInPileup(read, location.getStart())) {
- if ( op == CigarOperator.D ) {
- if ( ! includeReadsWithDeletionAtLoci )
- continue;
- nDeletions++;
+ if ( ! includeReadsWithDeletionAtLoci && op == CigarOperator.D ) {
+ continue;
}
pile.add(state.makePileupElement());
- size++;
-
- if ( read.getMappingQuality() == 0 )
- nMQ0Reads++;
}
}
- if (pile.size() != 0) // if this pileup added at least one base, add it to the full pileup
- fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
+ if (! pile.isEmpty() ) // if this pileup added at least one base, add it to the full pileup
+ fullPileup.put(sample, new ReadBackedPileupImpl(location, pile));
}
- updateReadStates(); // critical - must be called after we get the current state offsets and location
- if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
+ updateReadStates(); // critical - must be called after we get the current state offsets and location
+ if (!fullPileup.isEmpty()) // if we got reads with non-D/N over the current position, we are done
nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
deleted file mode 100644
index 73a11de2c..000000000
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ /dev/null
@@ -1,1064 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-*
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package org.broadinstitute.sting.utils.pileup;
-
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.variant.utils.BaseUtils;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.fragments.FragmentCollection;
-import org.broadinstitute.sting.utils.fragments.FragmentUtils;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-
-import java.util.*;
-
-/**
- * A generic implementation of read-backed pileups.
- *
- * @author mhanna
- * @version 0.1
- */
-public abstract class AbstractReadBackedPileup, PE extends PileupElement> implements ReadBackedPileup {
- protected final GenomeLoc loc;
- protected final PileupElementTracker pileupElementTracker;
-
- protected int size = 0; // cached value of the size of the pileup
- protected int abstractSize = -1; // cached value of the abstract size of the pileup
- protected int nDeletions = 0; // cached value of the number of deletions
- protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads
-
- /**
- * Create a new version of a read backed pileup at loc, using the reads and their corresponding
- * offsets. This pileup will contain a list, in order of the reads, of the piled bases at
- * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
- * go changing the reads.
- *
- * @param loc The genome loc to associate reads wotj
- * @param reads
- * @param offsets
- */
- public AbstractReadBackedPileup(GenomeLoc loc, List reads, List offsets) {
- this.loc = loc;
- this.pileupElementTracker = readsOffsets2Pileup(reads, offsets);
- }
-
-
- /**
- * Create a new version of a read backed pileup at loc without any aligned reads
- */
- public AbstractReadBackedPileup(GenomeLoc loc) {
- this(loc, new UnifiedPileupElementTracker());
- }
-
- /**
- * Create a new version of a read backed pileup at loc, using the reads and their corresponding
- * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
- * pointer to pileup. Don't go changing the data in pileup.
- */
- public AbstractReadBackedPileup(GenomeLoc loc, List pileup) {
- if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup");
- if (pileup == null) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup");
-
- this.loc = loc;
- this.pileupElementTracker = new UnifiedPileupElementTracker(pileup);
- calculateCachedData();
- }
-
- /**
- * Optimization of above constructor where all of the cached data is provided
- *
- * @param loc
- * @param pileup
- */
- public AbstractReadBackedPileup(GenomeLoc loc, List pileup, int size, int nDeletions, int nMQ0Reads) {
- if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in UnifiedReadBackedPileup");
- if (pileup == null) throw new ReviewedStingException("Illegal null pileup in UnifiedReadBackedPileup");
-
- this.loc = loc;
- this.pileupElementTracker = new UnifiedPileupElementTracker(pileup);
- this.size = size;
- this.nDeletions = nDeletions;
- this.nMQ0Reads = nMQ0Reads;
- }
-
-
- protected AbstractReadBackedPileup(GenomeLoc loc, PileupElementTracker tracker) {
- this.loc = loc;
- this.pileupElementTracker = tracker;
- calculateCachedData();
- }
-
- protected AbstractReadBackedPileup(GenomeLoc loc, Map> pileupsBySample) {
- this.loc = loc;
- PerSamplePileupElementTracker tracker = new PerSamplePileupElementTracker();
- for (Map.Entry> pileupEntry : pileupsBySample.entrySet()) {
- tracker.addElements(pileupEntry.getKey(), pileupEntry.getValue().pileupElementTracker);
- addPileupToCumulativeStats(pileupEntry.getValue());
- }
- this.pileupElementTracker = tracker;
- }
-
- public AbstractReadBackedPileup(GenomeLoc loc, List reads, int offset) {
- this.loc = loc;
- this.pileupElementTracker = readsOffsets2Pileup(reads, offset);
- }
-
- /**
- * Calculate cached sizes, nDeletion, and base counts for the pileup. This calculation is done upfront,
- * so you pay the cost at the start, but it's more efficient to do this rather than pay the cost of calling
- * sizes, nDeletion, etc. over and over potentially.
- */
- protected void calculateCachedData() {
- size = 0;
- nDeletions = 0;
- nMQ0Reads = 0;
-
- for (PileupElement p : pileupElementTracker) {
- size++;
- if (p.isDeletion()) {
- nDeletions++;
- }
- if (p.getRead().getMappingQuality() == 0) {
- nMQ0Reads++;
- }
- }
- }
-
- protected void calculateAbstractSize() {
- abstractSize = 0;
- for (PileupElement p : pileupElementTracker) {
- abstractSize += p.getRepresentativeCount();
- }
- }
-
- protected void addPileupToCumulativeStats(AbstractReadBackedPileup pileup) {
- size += pileup.getNumberOfElements();
- abstractSize = pileup.depthOfCoverage() + (abstractSize == -1 ? 0 : abstractSize);
- nDeletions += pileup.getNumberOfDeletions();
- nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
- }
-
- /**
- * Helper routine for converting reads and offset lists to a PileupElement list.
- *
- * @param reads
- * @param offsets
- * @return
- */
- private PileupElementTracker readsOffsets2Pileup(List reads, List offsets) {
- if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
- if (offsets == null) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup");
- if (reads.size() != offsets.size())
- throw new ReviewedStingException("Reads and offset lists have different sizes!");
-
- UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker();
- for (int i = 0; i < reads.size(); i++) {
- GATKSAMRecord read = reads.get(i);
- int offset = offsets.get(i);
- pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important
- }
-
- return pileup;
- }
-
- /**
- * Helper routine for converting reads and a single offset to a PileupElement list.
- *
- * @param reads
- * @param offset
- * @return
- */
- private PileupElementTracker readsOffsets2Pileup(List reads, int offset) {
- if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
- if (offset < 0) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup");
-
- UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker();
- for (GATKSAMRecord read : reads) {
- pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important
- }
-
- return pileup;
- }
-
- protected abstract AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker);
-
- protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset);
-
- // --------------------------------------------------------
- //
- // Special 'constructors'
- //
- // --------------------------------------------------------
-
- /**
- * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
- * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
- * of the pileup (just returns this) if there are no deletions in the pileup.
- *
- * @return
- */
- @Override
- public RBP getPileupWithoutDeletions() {
- if (getNumberOfDeletions() > 0) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupWithoutDeletions();
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return (RBP) createNewPileup(loc, filteredTracker);
-
- } else {
- UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- for (PE p : tracker) {
- if (!p.isDeletion()) {
- filteredTracker.add(p);
- }
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- } else {
- return (RBP) this;
- }
- }
-
- /**
- * Returns a new ReadBackedPileup where only one read from an overlapping read
- * pair is retained. If the two reads in question disagree to their basecall,
- * neither read is retained. If they agree on the base, the read with the higher
- * base quality observation is retained
- *
- * @return the newly filtered pileup
- */
- @Override
- public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
- return getOverlappingFragmentFilteredPileup(true, true);
- }
-
- /**
- * Returns a new ReadBackedPileup where only one read from an overlapping read
- * pair is retained. If discardDiscordant and the two reads in question disagree to their basecall,
- * neither read is retained. Otherwise, the read with the higher
- * quality (base or mapping, depending on baseQualNotMapQual) observation is retained
- *
- * @return the newly filtered pileup
- */
- @Override
- public RBP getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(discardDiscordant, baseQualNotMapQual);
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- } else {
- Map filteredPileup = new HashMap();
-
- for (PE p : pileupElementTracker) {
- String readName = p.getRead().getReadName();
-
- // if we've never seen this read before, life is good
- if (!filteredPileup.containsKey(readName)) {
- filteredPileup.put(readName, p);
- } else {
- PileupElement existing = filteredPileup.get(readName);
-
- // if the reads disagree at this position, throw them both out. Otherwise
- // keep the element with the higher quality score
- if (discardDiscordant && existing.getBase() != p.getBase()) {
- filteredPileup.remove(readName);
- } else {
- if (baseQualNotMapQual) {
- if (existing.getQual() < p.getQual())
- filteredPileup.put(readName, p);
- }
- else {
- if (existing.getMappingQual() < p.getMappingQual())
- filteredPileup.put(readName, p);
- }
- }
- }
- }
-
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- for (PE filteredElement : filteredPileup.values())
- filteredTracker.add(filteredElement);
-
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- }
-
-
- /**
- * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
- * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
- * of the pileup (just returns this) if there are no MQ0 reads in the pileup.
- *
- * @return
- */
- @Override
- public RBP getPileupWithoutMappingQualityZeroReads() {
- if (getNumberOfMappingQualityZeroReads() > 0) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupWithoutMappingQualityZeroReads();
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return (RBP) createNewPileup(loc, filteredTracker);
-
- } else {
- UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- for (PE p : tracker) {
- if (p.getRead().getMappingQuality() > 0) {
- filteredTracker.add(p);
- }
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- } else {
- return (RBP) this;
- }
- }
-
- public RBP getPositiveStrandPileup() {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPositiveStrandPileup();
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- } else {
- UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- for (PE p : tracker) {
- if (!p.getRead().getReadNegativeStrandFlag()) {
- filteredTracker.add(p);
- }
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- }
-
- /**
- * Gets the pileup consisting of only reads on the negative strand.
- *
- * @return A read-backed pileup consisting only of reads on the negative strand.
- */
- public RBP getNegativeStrandPileup() {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getNegativeStrandPileup();
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- } else {
- UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- for (PE p : tracker) {
- if (p.getRead().getReadNegativeStrandFlag()) {
- filteredTracker.add(p);
- }
- }
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- }
-
- /**
- * Gets a pileup consisting of all those elements passed by a given filter.
- *
- * @param filter Filter to use when testing for elements.
- * @return a pileup without the given filtered elements.
- */
- public RBP getFilteredPileup(PileupElementFilter filter) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getFilteredPileup(filter);
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
-
- return (RBP) createNewPileup(loc, filteredTracker);
- } else {
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- for (PE p : pileupElementTracker) {
- if (filter.allow(p))
- filteredTracker.add(p);
- }
-
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- }
-
- /**
- * Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
- * reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
- *
- * @param minBaseQ
- * @param minMapQ
- * @return
- */
- @Override
- public RBP getBaseAndMappingFilteredPileup(int minBaseQ, int minMapQ) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ, minMapQ);
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
-
- return (RBP) createNewPileup(loc, filteredTracker);
- } else {
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- for (PE p : pileupElementTracker) {
- if (p.getRead().getMappingQuality() >= minMapQ && (p.isDeletion() || p.getQual() >= minBaseQ)) {
- filteredTracker.add(p);
- }
- }
-
- return (RBP) createNewPileup(loc, filteredTracker);
- }
- }
-
- /**
- * Returns subset of this pileup that contains only bases with quality >= minBaseQ.
- * This method allocates and returns a new instance of ReadBackedPileup.
- *
- * @param minBaseQ
- * @return
- */
- @Override
- public RBP getBaseFilteredPileup(int minBaseQ) {
- return getBaseAndMappingFilteredPileup(minBaseQ, -1);
- }
-
- /**
- * Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
- * This method allocates and returns a new instance of ReadBackedPileup.
- *
- * @param minMapQ
- * @return
- */
- @Override
- public RBP getMappingFilteredPileup(int minMapQ) {
- return getBaseAndMappingFilteredPileup(-1, minMapQ);
- }
-
- /**
- * Gets a list of the read groups represented in this pileup.
- *
- * @return
- */
- @Override
- public Collection getReadGroups() {
- Set readGroups = new HashSet();
- for (PileupElement pileupElement : this)
- readGroups.add(pileupElement.getRead().getReadGroup().getReadGroupId());
- return readGroups;
- }
-
- /**
- * Gets the pileup for a given read group. Horrendously inefficient at this point.
- *
- * @param targetReadGroupId Identifier for the read group.
- * @return A read-backed pileup containing only the reads in the given read group.
- */
- @Override
- public RBP getPileupForReadGroup(String targetReadGroupId) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroup(targetReadGroupId);
- if (pileup != null)
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- } else {
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- for (PE p : pileupElementTracker) {
- GATKSAMRecord read = p.getRead();
- if (targetReadGroupId != null) {
- if (read.getReadGroup() != null && targetReadGroupId.equals(read.getReadGroup().getReadGroupId()))
- filteredTracker.add(p);
- } else {
- if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
- filteredTracker.add(p);
- }
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- }
- }
-
- /**
- * Gets the pileup for a set of read groups. Horrendously inefficient at this point.
- *
- * @param rgSet List of identifiers for the read groups.
- * @return A read-backed pileup containing only the reads in the given read groups.
- */
- @Override
- public RBP getPileupForReadGroups(final HashSet rgSet) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForReadGroups(rgSet);
- if (pileup != null)
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- } else {
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- for (PE p : pileupElementTracker) {
- GATKSAMRecord read = p.getRead();
- if (rgSet != null && !rgSet.isEmpty()) {
- if (read.getReadGroup() != null && rgSet.contains(read.getReadGroup().getReadGroupId()))
- filteredTracker.add(p);
- } else {
- if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
- filteredTracker.add(p);
- }
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- }
- }
-
- @Override
- public RBP getPileupForLane(String laneID) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- AbstractReadBackedPileup pileup = createNewPileup(loc, perSampleElements).getPileupForLane(laneID);
- if (pileup != null)
- filteredTracker.addElements(sample, pileup.pileupElementTracker);
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- } else {
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- for (PE p : pileupElementTracker) {
- GATKSAMRecord read = p.getRead();
- if (laneID != null) {
- if (read.getReadGroup() != null &&
- (read.getReadGroup().getReadGroupId().startsWith(laneID + ".")) || // lane is the same, but sample identifier is different
- (read.getReadGroup().getReadGroupId().equals(laneID))) // in case there is no sample identifier, they have to be exactly the same
- filteredTracker.add(p);
- } else {
- if (read.getReadGroup() == null || read.getReadGroup().getReadGroupId() == null)
- filteredTracker.add(p);
- }
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- }
- }
-
- public Collection getSamples() {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- return new HashSet(tracker.getSamples());
- } else {
- Collection sampleNames = new HashSet();
- for (PileupElement p : this) {
- GATKSAMRecord read = p.getRead();
- String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
- sampleNames.add(sampleName);
- }
- return sampleNames;
- }
- }
-
- /**
- * Returns a pileup randomly downsampled to the desiredCoverage.
- *
- * TODO: delete this once the experimental downsampler stabilizes
- *
- * @param desiredCoverage
- * @return
- */
- @Override
- public RBP getDownsampledPileup(int desiredCoverage) {
- if (getNumberOfElements() <= desiredCoverage)
- return (RBP) this;
-
- // randomly choose numbers corresponding to positions in the reads list
- TreeSet positions = new TreeSet();
- for (int i = 0; i < desiredCoverage; /* no update */) {
- if (positions.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(size)))
- i++;
- }
-
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
-
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
-
- int current = 0;
- UnifiedPileupElementTracker filteredPileup = new UnifiedPileupElementTracker();
- for (PE p : perSampleElements) {
- if (positions.contains(current))
- filteredPileup.add(p);
- current++;
-
- }
- filteredTracker.addElements(sample, filteredPileup);
- }
-
- return (RBP) createNewPileup(loc, filteredTracker);
- } else {
- UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
-
- Iterator positionIter = positions.iterator();
-
- while (positionIter.hasNext()) {
- int nextReadToKeep = (Integer) positionIter.next();
- filteredTracker.add(tracker.get(nextReadToKeep));
- }
-
- return (RBP) createNewPileup(getLocation(), filteredTracker);
- }
- }
-
- @Override
- public RBP getPileupForSamples(Collection sampleNames) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PileupElementTracker filteredElements = tracker.getElements(sampleNames);
- return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null;
- } else {
- HashSet hashSampleNames = new HashSet(sampleNames); // to speed up the "contains" access in the for loop
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- for (PE p : pileupElementTracker) {
- GATKSAMRecord read = p.getRead();
- if (sampleNames != null) { // still checking on sampleNames because hashSampleNames will never be null. And empty means something else.
- if (read.getReadGroup() != null && hashSampleNames.contains(read.getReadGroup().getSample()))
- filteredTracker.add(p);
- } else {
- if (read.getReadGroup() == null || read.getReadGroup().getSample() == null)
- filteredTracker.add(p);
- }
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- }
- }
-
- @Override
- public Map getPileupsForSamples(Collection sampleNames) {
- Map result = new HashMap();
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- for (String sample : sampleNames) {
- PileupElementTracker filteredElements = tracker.getElements(sample);
- if (filteredElements != null)
- result.put(sample, createNewPileup(loc, filteredElements));
- }
- } else {
- Map> trackerMap = new HashMap>();
-
- for (String sample : sampleNames) { // initialize pileups for each sample
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- trackerMap.put(sample, filteredTracker);
- }
- for (PE p : pileupElementTracker) { // go through all pileup elements only once and add them to the respective sample's pileup
- GATKSAMRecord read = p.getRead();
- if (read.getReadGroup() != null) {
- String sample = read.getReadGroup().getSample();
- UnifiedPileupElementTracker tracker = trackerMap.get(sample);
- if (tracker != null) // we only add the pileup the requested samples. Completely ignore the rest
- tracker.add(p);
- }
- }
- for (Map.Entry> entry : trackerMap.entrySet()) // create the RBP for each sample
- result.put(entry.getKey(), createNewPileup(loc, entry.getValue()));
- }
- return result;
- }
-
-
- @Override
- public RBP getPileupForSample(String sampleName) {
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- PileupElementTracker filteredElements = tracker.getElements(sampleName);
- return filteredElements != null ? (RBP) createNewPileup(loc, filteredElements) : null;
- } else {
- UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
- for (PE p : pileupElementTracker) {
- GATKSAMRecord read = p.getRead();
- if (sampleName != null) {
- if (read.getReadGroup() != null && sampleName.equals(read.getReadGroup().getSample()))
- filteredTracker.add(p);
- } else {
- if (read.getReadGroup() == null || read.getReadGroup().getSample() == null)
- filteredTracker.add(p);
- }
- }
- return filteredTracker.size() > 0 ? (RBP) createNewPileup(loc, filteredTracker) : null;
- }
- }
-
- // --------------------------------------------------------
- //
- // iterators
- //
- // --------------------------------------------------------
-
- /**
- * The best way to access PileupElements where you only care about the bases and quals in the pileup.
- *
- * for (PileupElement p : this) { doSomething(p); }
- *
- * Provides efficient iteration of the data.
- *
- * @return
- */
- @Override
- public Iterator iterator() {
- return new Iterator() {
- private final Iterator wrappedIterator = pileupElementTracker.iterator();
-
- public boolean hasNext() {
- return wrappedIterator.hasNext();
- }
-
- public PileupElement next() {
- return wrappedIterator.next();
- }
-
- public void remove() {
- throw new UnsupportedOperationException("Cannot remove from a pileup element iterator");
- }
- };
- }
-
- /**
- * The best way to access PileupElements where you only care not only about bases and quals in the pileup
- * but also need access to the index of the pileup element in the pile.
- *
- * for (ExtendedPileupElement p : this) { doSomething(p); }
- *
- * Provides efficient iteration of the data.
- *
- * @return
- */
-
- /**
- * Simple useful routine to count the number of deletion bases in this pileup
- *
- * @return
- */
- @Override
- public int getNumberOfDeletions() {
- return nDeletions;
- }
-
- @Override
- public int getNumberOfMappingQualityZeroReads() {
- return nMQ0Reads;
- }
-
- /**
- * @return the number of physical elements in this pileup
- */
- @Override
- public int getNumberOfElements() {
- return size;
- }
-
- /**
- * @return the number of abstract elements in this pileup
- */
- @Override
- public int depthOfCoverage() {
- if (abstractSize == -1)
- calculateAbstractSize();
- return abstractSize;
- }
-
- /**
- * @return true if there are 0 elements in the pileup, false otherwise
- */
- @Override
- public boolean isEmpty() {
- return size == 0;
- }
-
-
- /**
- * @return the location of this pileup
- */
- @Override
- public GenomeLoc getLocation() {
- return loc;
- }
-
- /**
- * Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
- * to BaseUtils.simpleBaseToBaseIndex for each base.
- *
- * @return
- */
- @Override
- public int[] getBaseCounts() {
- int[] counts = new int[4];
-
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
- for (final String sample : tracker.getSamples()) {
- int[] countsBySample = createNewPileup(loc, tracker.getElements(sample)).getBaseCounts();
- for (int i = 0; i < counts.length; i++)
- counts[i] += countsBySample[i];
- }
- } else {
- for (PileupElement pile : this) {
- // skip deletion sites
- if (!pile.isDeletion()) {
- int index = BaseUtils.simpleBaseToBaseIndex((char) pile.getBase());
- if (index != -1)
- counts[index]++;
- }
- }
- }
-
- return counts;
- }
-
- @Override
- public String getPileupString(Character ref) {
- // In the pileup format, each line represents a genomic position, consisting of chromosome name,
- // coordinate, reference base, read bases, read qualities and alignment mapping qualities.
- return String.format("%s %s %c %s %s",
- getLocation().getContig(), getLocation().getStart(), // chromosome name and coordinate
- ref, // reference base
- new String(getBases()),
- getQualsString());
- }
-
- // --------------------------------------------------------
- //
- // Convenience functions that may be slow
- //
- // --------------------------------------------------------
-
- /**
- * Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
- *
- * @return
- */
- @Override
- public List getReads() {
- List reads = new ArrayList(getNumberOfElements());
- for (PileupElement pile : this) {
- reads.add(pile.getRead());
- }
- return reads;
- }
-
- @Override
- public int getNumberOfDeletionsAfterThisElement() {
- int count = 0;
- for (PileupElement p : this) {
- if (p.isBeforeDeletionStart())
- count++;
- }
- return count;
- }
-
- @Override
- public int getNumberOfInsertionsAfterThisElement() {
- int count = 0;
- for (PileupElement p : this) {
- if (p.isBeforeInsertion())
- count++;
- }
- return count;
-
- }
- /**
- * Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
- *
- * @return
- */
- @Override
- public List getOffsets() {
- List offsets = new ArrayList(getNumberOfElements());
- for (PileupElement pile : this) {
- offsets.add(pile.getOffset());
- }
- return offsets;
- }
-
- /**
- * Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
- *
- * @return
- */
- @Override
- public byte[] getBases() {
- byte[] v = new byte[getNumberOfElements()];
- int pos = 0;
- for (PileupElement pile : pileupElementTracker) {
- v[pos++] = pile.getBase();
- }
- return v;
- }
-
- /**
- * Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
- *
- * @return
- */
- @Override
- public byte[] getQuals() {
- byte[] v = new byte[getNumberOfElements()];
- int pos = 0;
- for (PileupElement pile : pileupElementTracker) {
- v[pos++] = pile.getQual();
- }
- return v;
- }
-
- /**
- * Get an array of the mapping qualities
- *
- * @return
- */
- @Override
- public byte[] getMappingQuals() {
- byte[] v = new byte[getNumberOfElements()];
- int pos = 0;
- for (PileupElement pile : pileupElementTracker) {
- v[pos++] = (byte) pile.getRead().getMappingQuality();
- }
- return v;
- }
-
- static String quals2String(byte[] quals) {
- StringBuilder qualStr = new StringBuilder();
- for (int qual : quals) {
- qual = Math.min(qual, 63); // todo: fixme, this isn't a good idea
- char qualChar = (char) (33 + qual); // todo: warning, this is illegal for qual > 63
- qualStr.append(qualChar);
- }
-
- return qualStr.toString();
- }
-
- private String getQualsString() {
- return quals2String(getQuals());
- }
-
- /**
- * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads.
- *
- * @return
- */
- @Override
- public RBP getStartSortedPileup() {
-
- final TreeSet sortedElements = new TreeSet(new Comparator() {
- @Override
- public int compare(PE element1, PE element2) {
- final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
- return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
- }
- });
-
- if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
- PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
-
- for (final String sample : tracker.getSamples()) {
- PileupElementTracker perSampleElements = tracker.getElements(sample);
- for (PE pile : perSampleElements)
- sortedElements.add(pile);
- }
- }
- else {
- UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
- for (PE pile : tracker)
- sortedElements.add(pile);
- }
-
- UnifiedPileupElementTracker sortedTracker = new UnifiedPileupElementTracker();
- for (PE pile : sortedElements)
- sortedTracker.add(pile);
-
- return (RBP) createNewPileup(loc, sortedTracker);
- }
-
- @Override
- public FragmentCollection toFragments() {
- return FragmentUtils.create(this);
- }
-
- @Override
- public ReadBackedPileup copy() {
- return new ReadBackedPileupImpl(loc, (PileupElementTracker) pileupElementTracker.copy());
- }
-}
-
-
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
index 0a0d4ab9c..288b033cb 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
@@ -25,6 +25,8 @@
package org.broadinstitute.sting.utils.pileup;
+import org.apache.commons.collections.iterators.IteratorChain;
+
import java.util.*;
/**
@@ -35,6 +37,20 @@ import java.util.*;
*/
abstract class PileupElementTracker implements Iterable {
public abstract int size();
+
+ /**
+ * Iterate through the PEs here, but in any order, which may improve performance
+ * if you don't care about the underlying order the reads are coming to you in.
+ * @return an iteratable over all pileup elements in this tracker
+ */
+ public abstract Iterable unorderedIterable();
+
+ /**
+ * Same as @see #unorderedIterable but the actual iterator itself
+ * @return
+ */
+ public Iterator unorderedIterator() { return unorderedIterable().iterator(); }
+
public abstract PileupElementTracker copy();
}
@@ -65,6 +81,7 @@ class UnifiedPileupElementTracker extends PileupElemen
}
public Iterator iterator() { return pileup.iterator(); }
+ public Iterable unorderedIterable() { return this; }
}
class PerSamplePileupElementTracker extends PileupElementTracker {
@@ -113,4 +130,25 @@ class PerSamplePileupElementTracker extends PileupElem
public int size() {
return size;
}
+
+
+ public Iterable unorderedIterable() {
+ return new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ final private IteratorChain chain = new IteratorChain();
+
+ { // initialize the chain with the unordered iterators of the per sample pileups
+ for ( PileupElementTracker pet : pileup.values() ) {
+ chain.addIterator(pet.unorderedIterator());
+ }
+ }
+ @Override public boolean hasNext() { return chain.hasNext(); }
+ @Override public PE next() { return (PE)chain.next(); }
+ @Override public void remove() { throw new UnsupportedOperationException("Cannot remove"); }
+ };
+ }
+ };
+ }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
index fa42964b9..fe43f85bd 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
@@ -25,33 +25,64 @@
package org.broadinstitute.sting.utils.pileup;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.fragments.FragmentCollection;
+import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.variant.utils.BaseUtils;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
-public class ReadBackedPileupImpl extends AbstractReadBackedPileup implements ReadBackedPileup {
+public class ReadBackedPileupImpl implements ReadBackedPileup {
+ protected final GenomeLoc loc;
+ protected final PileupElementTracker pileupElementTracker;
- public ReadBackedPileupImpl(GenomeLoc loc) {
- super(loc);
- }
+ private final static int UNINITIALIZED_CACHED_INT_VALUE = -1;
+ /**
+ * Different then number of elements due to reduced reads
+ */
+ private int depthOfCoverage = UNINITIALIZED_CACHED_INT_VALUE;
+ private int nDeletions = UNINITIALIZED_CACHED_INT_VALUE; // cached value of the number of deletions
+ private int nMQ0Reads = UNINITIALIZED_CACHED_INT_VALUE; // cached value of the number of MQ0 reads
+
+ /**
+ * Create a new version of a read backed pileup at loc, using the reads and their corresponding
+ * offsets. This pileup will contain a list, in order of the reads, of the piled bases at
+ * reads[i] for all i in offsets. Does not make a copy of the data, so it's not safe to
+ * go changing the reads.
+ *
+ * @param loc The genome loc to associate reads wotj
+ * @param reads
+ * @param offsets
+ */
public ReadBackedPileupImpl(GenomeLoc loc, List reads, List offsets) {
- super(loc, reads, offsets);
+ this.loc = loc;
+ this.pileupElementTracker = readsOffsets2Pileup(reads, offsets);
}
- public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset) {
- super(loc, reads, offset);
+
+ /**
+ * Create a new version of a read backed pileup at loc without any aligned reads
+ */
+ public ReadBackedPileupImpl(GenomeLoc loc) {
+ this(loc, new UnifiedPileupElementTracker());
}
- public ReadBackedPileupImpl(GenomeLoc loc, List pileupElements) {
- super(loc, pileupElements);
- }
+ /**
+ * Create a new version of a read backed pileup at loc, using the reads and their corresponding
+ * offsets. This lower level constructure assumes pileup is well-formed and merely keeps a
+ * pointer to pileup. Don't go changing the data in pileup.
+ */
+ public ReadBackedPileupImpl(GenomeLoc loc, List pileup) {
+ if (loc == null) throw new ReviewedStingException("Illegal null genomeloc in ReadBackedPileup");
+ if (pileup == null) throw new ReviewedStingException("Illegal null pileup in ReadBackedPileup");
- public ReadBackedPileupImpl(GenomeLoc loc, Map pileupElementsBySample) {
- super(loc, pileupElementsBySample);
+ this.loc = loc;
+ this.pileupElementTracker = new UnifiedPileupElementTracker(pileup);
}
/**
@@ -59,25 +90,954 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup pileup, int size, int nDeletions, int nMQ0Reads) {
- super(loc, pileup, size, nDeletions, nMQ0Reads);
+ this(loc, pileup);
}
protected ReadBackedPileupImpl(GenomeLoc loc, PileupElementTracker tracker) {
- super(loc, tracker);
+ this.loc = loc;
+ this.pileupElementTracker = tracker;
+ }
+
+ public ReadBackedPileupImpl(GenomeLoc loc, Map pileupsBySample) {
+ this.loc = loc;
+ PerSamplePileupElementTracker tracker = new PerSamplePileupElementTracker();
+ for (Map.Entry pileupEntry : pileupsBySample.entrySet()) {
+ tracker.addElements(pileupEntry.getKey(), pileupEntry.getValue().pileupElementTracker);
+ }
+ this.pileupElementTracker = tracker;
+ }
+
+ public ReadBackedPileupImpl(GenomeLoc loc, List reads, int offset) {
+ this.loc = loc;
+ this.pileupElementTracker = readsOffsets2Pileup(reads, offset);
+ }
+
+ /**
+ * Helper routine for converting reads and offset lists to a PileupElement list.
+ *
+ * @param reads
+ * @param offsets
+ * @return
+ */
+ private PileupElementTracker readsOffsets2Pileup(List reads, List offsets) {
+ if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
+ if (offsets == null) throw new ReviewedStingException("Illegal null offsets list in UnifiedReadBackedPileup");
+ if (reads.size() != offsets.size())
+ throw new ReviewedStingException("Reads and offset lists have different sizes!");
+
+ UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker();
+ for (int i = 0; i < reads.size(); i++) {
+ GATKSAMRecord read = reads.get(i);
+ int offset = offsets.get(i);
+ pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important
+ }
+
+ return pileup;
+ }
+
+ /**
+ * Helper routine for converting reads and a single offset to a PileupElement list.
+ *
+ * @param reads
+ * @param offset
+ * @return
+ */
+ private PileupElementTracker readsOffsets2Pileup(List reads, int offset) {
+ if (reads == null) throw new ReviewedStingException("Illegal null read list in UnifiedReadBackedPileup");
+ if (offset < 0) throw new ReviewedStingException("Illegal offset < 0 UnifiedReadBackedPileup");
+
+ UnifiedPileupElementTracker pileup = new UnifiedPileupElementTracker();
+ for (GATKSAMRecord read : reads) {
+ pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important
+ }
+
+ return pileup;
}
- @Override
protected ReadBackedPileupImpl createNewPileup(GenomeLoc loc, PileupElementTracker tracker) {
return new ReadBackedPileupImpl(loc, tracker);
}
- @Override
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
return LocusIteratorByState.createPileupForReadAndOffset(read, offset);
+ }
+
+ // --------------------------------------------------------
+ //
+ // Special 'constructors'
+ //
+ // --------------------------------------------------------
+
+ /**
+ * Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
+ * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
+ * of the pileup (just returns this) if there are no deletions in the pileup.
+ *
+ * @return
+ */
+ @Override
+ public ReadBackedPileupImpl getPileupWithoutDeletions() {
+ if (getNumberOfDeletions() > 0) {
+ if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
+ PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
+ PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
+
+ for (final String sample : tracker.getSamples()) {
+ PileupElementTracker perSampleElements = tracker.getElements(sample);
+ ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getPileupWithoutDeletions();
+ filteredTracker.addElements(sample, pileup.pileupElementTracker);
+ }
+ return createNewPileup(loc, filteredTracker);
+
+ } else {
+ UnifiedPileupElementTracker tracker = (UnifiedPileupElementTracker) pileupElementTracker;
+ UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
+
+ for (PileupElement p : tracker) {
+ if (!p.isDeletion()) {
+ filteredTracker.add(p);
+ }
+ }
+ return createNewPileup(loc, filteredTracker);
+ }
+ } else {
+ return this;
+ }
+ }
+
+ /**
+ * Returns a new ReadBackedPileup where only one read from an overlapping read
+ * pair is retained. If the two reads in question disagree to their basecall,
+ * neither read is retained. If they agree on the base, the read with the higher
+ * base quality observation is retained
+ *
+ * @return the newly filtered pileup
+ */
+ @Override
+ public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
+ return getOverlappingFragmentFilteredPileup(true, true);
+ }
+
+ /**
+ * Returns a new ReadBackedPileup where only one read from an overlapping read
+ * pair is retained. If discardDiscordant and the two reads in question disagree to their basecall,
+ * neither read is retained. Otherwise, the read with the higher
+ * quality (base or mapping, depending on baseQualNotMapQual) observation is retained
+ *
+ * @return the newly filtered pileup
+ */
+ @Override
+ public ReadBackedPileupImpl getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual) {
+ if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
+ PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker) pileupElementTracker;
+ PerSamplePileupElementTracker filteredTracker = new PerSamplePileupElementTracker();
+
+ for (final String sample : tracker.getSamples()) {
+ PileupElementTracker perSampleElements = tracker.getElements(sample);
+ ReadBackedPileupImpl pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(discardDiscordant, baseQualNotMapQual);
+ filteredTracker.addElements(sample, pileup.pileupElementTracker);
+ }
+ return createNewPileup(loc, filteredTracker);
+ } else {
+ Map filteredPileup = new HashMap();
+
+ for (PileupElement p : pileupElementTracker) {
+ String readName = p.getRead().getReadName();
+
+ // if we've never seen this read before, life is good
+ if (!filteredPileup.containsKey(readName)) {
+ filteredPileup.put(readName, p);
+ } else {
+ PileupElement existing = filteredPileup.get(readName);
+
+ // if the reads disagree at this position, throw them both out. Otherwise
+ // keep the element with the higher quality score
+ if (discardDiscordant && existing.getBase() != p.getBase()) {
+ filteredPileup.remove(readName);
+ } else {
+ if (baseQualNotMapQual) {
+ if (existing.getQual() < p.getQual())
+ filteredPileup.put(readName, p);
+ }
+ else {
+ if (existing.getMappingQual() < p.getMappingQual())
+ filteredPileup.put(readName, p);
+ }
+ }
+ }
+ }
+
+ UnifiedPileupElementTracker filteredTracker = new UnifiedPileupElementTracker();
+ for (PileupElement filteredElement : filteredPileup.values())
+ filteredTracker.add(filteredElement);
+
+ return createNewPileup(loc, filteredTracker);
+ }
+ }
+
+
+ /**
+ * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
+ * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
+ * of the pileup (just returns this) if there are no MQ0 reads in the pileup.
+ *
+ * @return
+ */
+ @Override
+ public ReadBackedPileupImpl getPileupWithoutMappingQualityZeroReads() {
+ if (getNumberOfMappingQualityZeroReads() > 0) {
+ if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
+ PerSamplePileupElementTracker tracker = (PerSamplePileupElementTracker