2009-03-16 06:21:48 +08:00
|
|
|
package org.broadinstitute.sting.gatk;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-11 03:34:00 +08:00
|
|
|
import net.sf.samtools.SAMRecord;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
|
|
|
|
import java.util.List;
|
2009-04-02 04:27:06 +08:00
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Iterator;
|
|
|
|
|
import java.util.TreeSet;
|
|
|
|
|
import java.util.Random;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-13 07:30:19 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
2009-03-16 22:46:19 +08:00
|
|
|
import edu.mit.broad.picard.reference.ReferenceSequence;
|
2009-03-13 07:30:19 +08:00
|
|
|
|
2009-02-27 05:50:29 +08:00
|
|
|
/**
|
2009-03-16 22:46:19 +08:00
|
|
|
* Useful class for forwarding on locusContext data from this iterator
|
|
|
|
|
*
|
2009-02-27 05:50:29 +08:00
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: mdepristo
|
|
|
|
|
* Date: Feb 22, 2009
|
|
|
|
|
* Time: 3:01:34 PM
|
|
|
|
|
* To change this template use File | Settings | File Templates.
|
|
|
|
|
*/
|
2009-03-16 22:46:19 +08:00
|
|
|
public class LocusContext {
|
|
|
|
|
private GenomeLoc loc = null;
|
|
|
|
|
private List<SAMRecord> reads = null;
|
|
|
|
|
private List<Integer> offsets = null;
|
|
|
|
|
private ReferenceSequence refContig = null;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* Create a new LocusContext object
|
|
|
|
|
*
|
|
|
|
|
* @param loc
|
|
|
|
|
* @param reads
|
|
|
|
|
* @param offsets
|
|
|
|
|
*/
|
|
|
|
|
public LocusContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
2009-04-14 08:55:19 +08:00
|
|
|
assert loc != null;
|
|
|
|
|
assert loc.getContig() != null;
|
|
|
|
|
assert reads != null;
|
|
|
|
|
assert offsets != null;
|
|
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
this.loc = loc;
|
|
|
|
|
this.reads = reads;
|
|
|
|
|
this.offsets = offsets;
|
|
|
|
|
}
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* get all of the reads within this context
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public List<SAMRecord> getReads() { return reads; }
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* Are there any reads associated with this locus?
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public boolean hasReads() {
|
|
|
|
|
return reads != null;
|
|
|
|
|
}
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* How many reads cover this locus?
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public int numReads() {
|
|
|
|
|
assert( reads != null );
|
|
|
|
|
return reads.size();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get a list of the equivalent positions within in the reads at Pos
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public List<Integer> getOffsets() {
|
|
|
|
|
return offsets;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String getContig() { return getLocation().getContig(); }
|
|
|
|
|
public long getPosition() { return getLocation().getStart(); }
|
|
|
|
|
public GenomeLoc getLocation() { return loc; }
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the entire reference sequence contig associated with these reads
|
|
|
|
|
*
|
|
|
|
|
* @return ReferenceSequence object, or null if unavailable
|
|
|
|
|
*/
|
|
|
|
|
public ReferenceSequence getReferenceContig() {
|
|
|
|
|
return refContig;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @return True if reference sequence contig is available
|
|
|
|
|
*/
|
|
|
|
|
public boolean hasReferenceContig() {
|
|
|
|
|
return refContig != null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets the reference sequence for this locus to contig
|
|
|
|
|
*
|
|
|
|
|
* @param contig
|
|
|
|
|
*/
|
|
|
|
|
public void setReferenceContig(final ReferenceSequence contig) {
|
|
|
|
|
refContig = contig;
|
|
|
|
|
}
|
2009-04-02 04:27:06 +08:00
|
|
|
|
|
|
|
|
public void downsampleToCoverage(int coverage) {
|
|
|
|
|
if ( numReads() <= coverage )
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// randomly choose numbers corresponding to positions in the reads list
|
|
|
|
|
Random generator = new Random();
|
|
|
|
|
TreeSet positions = new TreeSet();
|
|
|
|
|
int i = 0;
|
|
|
|
|
while ( i < coverage ) {
|
|
|
|
|
if (positions.add(new Integer(generator.nextInt(reads.size()))))
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ArrayList downsampledReads = new ArrayList();
|
|
|
|
|
Iterator positionIter = positions.iterator();
|
|
|
|
|
Iterator readsIter = reads.iterator();
|
|
|
|
|
int currentRead = 0;
|
|
|
|
|
while ( positionIter.hasNext() ) {
|
|
|
|
|
int nextReadToKeep = (Integer)positionIter.next();
|
|
|
|
|
|
|
|
|
|
// fast-forward to the right read
|
|
|
|
|
while ( currentRead < nextReadToKeep ) {
|
|
|
|
|
readsIter.next();
|
|
|
|
|
currentRead++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
downsampledReads.add(readsIter.next());
|
|
|
|
|
currentRead++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reads = downsampledReads;
|
|
|
|
|
}
|
2009-02-27 05:50:29 +08:00
|
|
|
}
|