2009-08-05 05:01:37 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2009 The Broad Institute
|
|
|
|
|
*
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
*
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package org.broadinstitute.sting.gatk.contexts;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-07-01 03:17:24 +08:00
|
|
|
import net.sf.picard.reference.ReferenceSequence;
|
2009-03-11 03:34:00 +08:00
|
|
|
import net.sf.samtools.SAMRecord;
|
2009-03-13 07:30:19 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
2009-07-01 03:17:24 +08:00
|
|
|
|
|
|
|
|
import java.util.*;
|
2009-03-13 07:30:19 +08:00
|
|
|
|
2009-02-27 05:50:29 +08:00
|
|
|
/**
|
2009-03-16 22:46:19 +08:00
|
|
|
* Useful class for forwarding on locusContext data from this iterator
|
|
|
|
|
*
|
2009-02-27 05:50:29 +08:00
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: mdepristo
|
|
|
|
|
* Date: Feb 22, 2009
|
|
|
|
|
* Time: 3:01:34 PM
|
|
|
|
|
* To change this template use File | Settings | File Templates.
|
|
|
|
|
*/
|
2009-08-05 05:01:37 +08:00
|
|
|
public class AlignmentContext {
|
2009-03-16 22:46:19 +08:00
|
|
|
private GenomeLoc loc = null;
|
|
|
|
|
private List<SAMRecord> reads = null;
|
|
|
|
|
private List<Integer> offsets = null;
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
2009-08-05 05:01:37 +08:00
|
|
|
* Create a new AlignmentContext object
|
2009-03-16 22:46:19 +08:00
|
|
|
*
|
|
|
|
|
* @param loc
|
|
|
|
|
* @param reads
|
|
|
|
|
* @param offsets
|
|
|
|
|
*/
|
2009-08-05 05:01:37 +08:00
|
|
|
public AlignmentContext(GenomeLoc loc, List<SAMRecord> reads, List<Integer> offsets) {
|
2009-05-28 06:02:24 +08:00
|
|
|
//assert loc != null;
|
|
|
|
|
//assert loc.getContig() != null;
|
|
|
|
|
//assert reads != null;
|
|
|
|
|
//assert offsets != null;
|
2009-04-14 08:55:19 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
this.loc = loc;
|
|
|
|
|
this.reads = reads;
|
|
|
|
|
this.offsets = offsets;
|
|
|
|
|
}
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* get all of the reads within this context
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public List<SAMRecord> getReads() { return reads; }
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* Are there any reads associated with this locus?
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public boolean hasReads() {
|
|
|
|
|
return reads != null;
|
|
|
|
|
}
|
2009-02-27 05:50:29 +08:00
|
|
|
|
2009-03-16 22:46:19 +08:00
|
|
|
/**
|
|
|
|
|
* How many reads cover this locus?
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public int numReads() {
|
|
|
|
|
assert( reads != null );
|
|
|
|
|
return reads.size();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* get a list of the equivalent positions within in the reads at Pos
|
|
|
|
|
*
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public List<Integer> getOffsets() {
|
|
|
|
|
return offsets;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public String getContig() { return getLocation().getContig(); }
|
|
|
|
|
public long getPosition() { return getLocation().getStart(); }
|
|
|
|
|
public GenomeLoc getLocation() { return loc; }
|
2009-09-09 09:28:05 +08:00
|
|
|
|
|
|
|
|
//public void setLocation(GenomeLoc loc) {
|
|
|
|
|
// this.loc = loc.clone();
|
|
|
|
|
//}
|
2009-04-02 04:27:06 +08:00
|
|
|
|
|
|
|
|
public void downsampleToCoverage(int coverage) {
|
|
|
|
|
if ( numReads() <= coverage )
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
// randomly choose numbers corresponding to positions in the reads list
|
|
|
|
|
Random generator = new Random();
|
|
|
|
|
TreeSet positions = new TreeSet();
|
|
|
|
|
int i = 0;
|
|
|
|
|
while ( i < coverage ) {
|
|
|
|
|
if (positions.add(new Integer(generator.nextInt(reads.size()))))
|
|
|
|
|
i++;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-24 12:33:35 +08:00
|
|
|
ArrayList<SAMRecord> downsampledReads = new ArrayList<SAMRecord>();
|
|
|
|
|
ArrayList<Integer> downsampledOffsets = new ArrayList<Integer>();
|
2009-04-02 04:27:06 +08:00
|
|
|
Iterator positionIter = positions.iterator();
|
2009-04-24 12:33:35 +08:00
|
|
|
Iterator<SAMRecord> readsIter = reads.iterator();
|
|
|
|
|
Iterator<Integer> offsetsIter = offsets.iterator();
|
2009-04-02 04:27:06 +08:00
|
|
|
int currentRead = 0;
|
|
|
|
|
while ( positionIter.hasNext() ) {
|
|
|
|
|
int nextReadToKeep = (Integer)positionIter.next();
|
|
|
|
|
|
|
|
|
|
// fast-forward to the right read
|
|
|
|
|
while ( currentRead < nextReadToKeep ) {
|
|
|
|
|
readsIter.next();
|
2009-04-24 12:33:35 +08:00
|
|
|
offsetsIter.next();
|
2009-04-02 04:27:06 +08:00
|
|
|
currentRead++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
downsampledReads.add(readsIter.next());
|
2009-04-24 12:33:35 +08:00
|
|
|
downsampledOffsets.add(offsetsIter.next());
|
2009-04-02 04:27:06 +08:00
|
|
|
currentRead++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
reads = downsampledReads;
|
2009-04-24 12:33:35 +08:00
|
|
|
offsets = downsampledOffsets;
|
2009-04-02 04:27:06 +08:00
|
|
|
}
|
2009-09-09 23:36:12 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns only the reads in ac that do not contain spanning deletions of this locus
|
|
|
|
|
*
|
|
|
|
|
* @param ac
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static AlignmentContext withoutSpanningDeletions( AlignmentContext ac ) {
|
|
|
|
|
return subsetDeletions( ac, true );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns only the reads in ac that do contain spanning deletions of this locus
|
|
|
|
|
*
|
|
|
|
|
* @param ac
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public static AlignmentContext withSpanningDeletions( AlignmentContext ac ) {
|
|
|
|
|
return subsetDeletions( ac, false );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static AlignmentContext subsetDeletions( AlignmentContext ac, boolean readsWithoutDeletions ) {
|
|
|
|
|
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>(ac.getReads().size());
|
|
|
|
|
ArrayList<Integer> offsets = new ArrayList<Integer>(ac.getReads().size());
|
|
|
|
|
for ( int i = 0; i < ac.getReads().size(); i++ ) {
|
|
|
|
|
SAMRecord read = ac.getReads().get(i);
|
|
|
|
|
int offset = ac.getOffsets().get(i);
|
|
|
|
|
if ( (offset == -1 && ! readsWithoutDeletions) || (offset != -1 && readsWithoutDeletions) ) {
|
|
|
|
|
reads.add(read);
|
|
|
|
|
offsets.add(offset);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new AlignmentContext(ac.getLocation(), reads, offsets);
|
|
|
|
|
}
|
2009-02-27 05:50:29 +08:00
|
|
|
}
|