2010-06-13 09:47:02 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2010, The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2009-11-25 11:51:41 +08:00
|
|
|
package org.broadinstitute.sting.utils.pileup;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
2010-06-13 09:47:02 +08:00
|
|
|
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
|
2009-11-25 11:51:41 +08:00
|
|
|
import net.sf.samtools.SAMRecord;
|
|
|
|
|
|
2010-06-13 09:47:02 +08:00
|
|
|
import java.util.List;
|
2010-06-18 23:03:08 +08:00
|
|
|
import java.util.Collection;
|
2010-06-13 09:47:02 +08:00
|
|
|
|
2009-11-25 11:51:41 +08:00
|
|
|
/**
|
2010-06-13 09:47:02 +08:00
|
|
|
* A data retrieval interface for accessing parts of the pileup.
|
2009-11-25 11:51:41 +08:00
|
|
|
*
|
2010-06-13 09:47:02 +08:00
|
|
|
* @author mhanna
|
|
|
|
|
* @version 0.1
|
2009-11-25 11:51:41 +08:00
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public interface ReadBackedPileup extends Iterable<PileupElement> {
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a new ReadBackedPileup that is free of deletion spanning reads in this pileup. Note that this
|
|
|
|
|
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
|
|
|
|
* of the pileup (just returns this) if there are no deletions in the pileup.
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getPileupWithoutDeletions();
|
2009-12-01 12:50:47 +08:00
|
|
|
|
2010-03-15 23:40:09 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a new ReadBackedPileup where only one read from an overlapping read
|
|
|
|
|
* pair is retained. If the two reads in question disagree to their basecall,
|
|
|
|
|
* neither read is retained. If they agree on the base, the read with the higher
|
|
|
|
|
* quality observation is retained
|
|
|
|
|
*
|
|
|
|
|
* @return the newly filtered pileup
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getOverlappingFragmentFilteredPileup();
|
2010-03-15 23:40:09 +08:00
|
|
|
|
2009-12-01 12:50:47 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this
|
|
|
|
|
* does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy
|
|
|
|
|
* of the pileup (just returns this) if there are no MQ0 reads in the pileup.
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getPileupWithoutMappingQualityZeroReads();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2010-02-02 05:19:53 +08:00
|
|
|
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ, coming from
|
|
|
|
|
* reads with mapping qualities >= minMapQ. This method allocates and returns a new instance of ReadBackedPileup.
|
|
|
|
|
* @param minBaseQ
|
|
|
|
|
* @param minMapQ
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getBaseAndMappingFilteredPileup( int minBaseQ, int minMapQ );
|
2009-12-17 05:39:58 +08:00
|
|
|
|
2010-02-02 05:19:53 +08:00
|
|
|
/** Returns subset of this pileup that contains only bases with quality >= minBaseQ.
|
|
|
|
|
* This method allocates and returns a new instance of ReadBackedPileup.
|
|
|
|
|
* @param minBaseQ
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getBaseFilteredPileup( int minBaseQ );
|
|
|
|
|
|
2010-02-02 05:19:53 +08:00
|
|
|
/** Returns subset of this pileup that contains only bases coming from reads with mapping quality >= minMapQ.
|
|
|
|
|
* This method allocates and returns a new instance of ReadBackedPileup.
|
|
|
|
|
* @param minMapQ
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getMappingFilteredPileup( int minMapQ );
|
2010-02-02 05:19:53 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a pileup randomly downsampled to the desiredCoverage.
|
|
|
|
|
*
|
|
|
|
|
* @param desiredCoverage
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getDownsampledPileup(int desiredCoverage);
|
2009-11-26 04:54:44 +08:00
|
|
|
|
2010-06-28 12:56:33 +08:00
|
|
|
/**
|
|
|
|
|
* Returns true if any reads have been filtered out of the pileup due to excess DoC.
|
|
|
|
|
* @return True if reads have been filtered out. False otherwise.
|
|
|
|
|
*/
|
|
|
|
|
public boolean hasPileupBeenDownsampled();
|
|
|
|
|
|
2010-06-18 23:03:08 +08:00
|
|
|
/**
|
|
|
|
|
* Gets a collection of all the samples stored in this pileup.
|
|
|
|
|
* @return Collection of samples in this pileup.
|
|
|
|
|
*/
|
|
|
|
|
public Collection<String> getSamples();
|
|
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
2010-06-13 09:47:02 +08:00
|
|
|
* Gets the particular subset of this pileup with the given sample name.
|
|
|
|
|
* @param sampleName Name of the sample to use.
|
|
|
|
|
* @return A subset of this pileup containing only reads with the given sample.
|
2009-11-26 04:54:44 +08:00
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public ReadBackedPileup getPileupForSample(String sampleName);
|
|
|
|
|
|
|
|
|
|
// todo -- delete or make private
|
|
|
|
|
public IterableIterator<ExtendedPileupElement> extendedForeachIterator();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Simple useful routine to count the number of deletion bases in this pileup
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public int getNumberOfDeletions();
|
2009-11-26 04:54:44 +08:00
|
|
|
|
2010-06-13 09:47:02 +08:00
|
|
|
public int getNumberOfMappingQualityZeroReads();
|
2009-12-01 12:50:47 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* @return the number of elements in this pileup
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public int size();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* @return the location of this pileup
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public GenomeLoc getLocation();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Get counts of A, C, G, T in order, which returns a int[4] vector with counts according
|
|
|
|
|
* to BaseUtils.simpleBaseToBaseIndex for each base.
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public int[] getBaseCounts();
|
2009-11-26 04:54:44 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Somewhat expensive routine that returns true if any base in the pileup has secondary bases annotated
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public boolean hasSecondaryBases();
|
2009-11-26 04:54:44 +08:00
|
|
|
|
2010-06-20 12:42:26 +08:00
|
|
|
public String getPileupString(Character ref);
|
2009-11-26 04:54:44 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns a list of the reads in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public List<SAMRecord> getReads();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a list of the offsets in this pileup. Note this call costs O(n) and allocates fresh lists each time
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public List<Integer> getOffsets();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Returns an array of the bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public byte[] getBases();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2009-11-26 04:54:44 +08:00
|
|
|
/**
|
|
|
|
|
* Returns an array of the secondary bases in this pileup. Note this call costs O(n) and allocates fresh array each time
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public byte[] getSecondaryBases();
|
2009-11-25 11:51:41 +08:00
|
|
|
|
2010-06-13 09:47:02 +08:00
|
|
|
/**
|
|
|
|
|
* Returns an array of the quals in this pileup. Note this call costs O(n) and allocates fresh array each time
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public byte[] getQuals();
|
2009-12-02 23:41:35 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Get an array of the mapping qualities
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
2010-06-13 09:47:02 +08:00
|
|
|
public byte[] getMappingQuals();
|
2009-11-25 11:51:41 +08:00
|
|
|
}
|