- Fix the CleanedReadInjector to deal with -L intervals correctly.
- Some walkers don't use the ref base, so speed up traversals by not requiring it git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1652 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7da9ff2a9e
commit
2b2df4e1ba
|
|
@ -4,7 +4,8 @@ import net.sf.samtools.SAMFileReader;
|
|||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
|
|
@ -26,6 +27,7 @@ import java.util.*;
|
|||
* Copies reads from the input stream into the <code>outputBAM</code>, replacing those
|
||||
* reads which have been cleaned with their new clean copies.
|
||||
*/
|
||||
@Requires({DataSource.READS})
|
||||
public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
||||
|
||||
/**
|
||||
|
|
@ -45,6 +47,11 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
*/
|
||||
private Queue<SAMRecord> cleanedReads = new LinkedList<SAMRecord>();
|
||||
|
||||
/**
|
||||
* The intervals specified by the user
|
||||
*/
|
||||
private HashMap<String, ArrayList<GenomeLoc>> intervals = null;
|
||||
|
||||
/**
|
||||
* A fast lookup table for uniquified read info
|
||||
*/
|
||||
|
|
@ -63,6 +70,21 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
cleanedReadHash.add(getUniquifiedReadName(read));
|
||||
}
|
||||
allReads.close();
|
||||
|
||||
// If there are intervals specified by the user,record them so we can make sure not
|
||||
// to emit reads outside the intervals. For now, we'll group them by chromosome to
|
||||
// make lookup a bit faster.
|
||||
if ( this.getToolkit().getArguments().intervals != null ) {
|
||||
intervals = new HashMap<String, ArrayList<GenomeLoc>>();
|
||||
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(this.getToolkit().getArguments().intervals);
|
||||
Iterator<GenomeLoc> iter = GenomeLocSortedSet.createSetFromList(locs).iterator();
|
||||
while ( iter.hasNext() ) {
|
||||
GenomeLoc loc = iter.next();
|
||||
if ( intervals.get(loc.getContig()) == null )
|
||||
intervals.put(loc.getContig(), new ArrayList<GenomeLoc>());
|
||||
intervals.get(loc.getContig()).add(loc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -81,9 +103,9 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
while ( firstCleanedRead != null &&
|
||||
firstCleanedRead.getReferenceIndex() <= read.getReferenceIndex() &&
|
||||
firstCleanedRead.getAlignmentStart() <= read.getAlignmentStart() ) {
|
||||
outputBAM.addAlignment(firstCleanedRead);
|
||||
cleanedReadCount++;
|
||||
cleanedReads.remove();
|
||||
if ( emit(firstCleanedRead) )
|
||||
cleanedReadCount++;
|
||||
cleanedReads.remove();
|
||||
firstCleanedRead = cleanedReads.peek();
|
||||
}
|
||||
|
||||
|
|
@ -92,6 +114,37 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
return cleanedReadCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether to emit the given read; if so, return true.
|
||||
*/
|
||||
private boolean emit(SAMRecord read) {
|
||||
// if no intervals were specified, emit everything
|
||||
if ( intervals == null ) {
|
||||
outputBAM.addAlignment(read);
|
||||
return true;
|
||||
}
|
||||
|
||||
ArrayList<GenomeLoc> intervalList = intervals.get(read.getReferenceName());
|
||||
if ( intervalList == null )
|
||||
return false;
|
||||
|
||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
for ( GenomeLoc interval : intervalList ) {
|
||||
// if it overlaps an interval, then we can emit it
|
||||
if ( interval.overlapsP(readLoc) ) {
|
||||
outputBAM.addAlignment(read);
|
||||
return true;
|
||||
}
|
||||
|
||||
// once we've passed any interval that could overlap it, just quit
|
||||
if ( interval.isPast(readLoc) )
|
||||
return false;
|
||||
}
|
||||
|
||||
// it didn't overlap an interval
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize traversal with number of reads which have been replaced with a clean version.
|
||||
* @return 0 to initialize the traversal.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
|
@ -18,6 +16,7 @@ import java.util.List;
|
|||
// although this can easily be changed if necessary.
|
||||
|
||||
@WalkerName("IndelIntervals")
|
||||
@Requires({DataSource.READS})
|
||||
@ReadFilters({Platform454Filter.class, ZeroMappingQualityReadFilter.class})
|
||||
public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval, IndelIntervalWalker.Interval> {
|
||||
@Argument(fullName="allow454Reads", shortName="454", doc="process 454 reads", required=false)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
|
@ -9,7 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
@WalkerName("SNPClusters")
|
||||
@Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)})
|
||||
@Requires(value={},referenceMetaData={@RMD(name="snps",type=AllelicVariant.class)})
|
||||
public class SNPClusterWalker extends RefWalker<GenomeLoc, GenomeLoc> {
|
||||
@Argument(fullName="windowSize", shortName="window", doc="window size for calculating clusters", required=false)
|
||||
int windowSize = 10;
|
||||
|
|
@ -60,4 +59,4 @@ public class SNPClusterWalker extends RefWalker<GenomeLoc, GenomeLoc> {
|
|||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue