Start getting the cleaner working in Walker
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@561 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4c5f640eb7
commit
7de5da7065
|
|
@ -99,28 +99,41 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> offsets = new ArrayList<Integer>();
|
||||
boolean done = false;
|
||||
long leftmostIndex = interval.getStart(),
|
||||
rightmostIndex = interval.getStop();
|
||||
while (filterIter.hasNext() && !done) {
|
||||
TraversalStatistics.nRecords++;
|
||||
SAMRecord read = filterIter.next();
|
||||
reads.add(read);
|
||||
offsets.add((int)(read.getAlignmentStart() - interval.getStart()));
|
||||
if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
|
||||
if ( read.getAlignmentStart() < leftmostIndex )
|
||||
leftmostIndex = read.getAlignmentStart();
|
||||
if ( read.getAlignmentEnd() > rightmostIndex )
|
||||
rightmostIndex = read.getAlignmentEnd();
|
||||
if ( this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads ) {
|
||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
|
||||
LocusContext locus = new LocusContext(interval, reads, offsets);
|
||||
GenomeLoc window = new GenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
LocusContext locus = new LocusContext(window, reads, offsets);
|
||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||
locus.downsampleToCoverage(downsamplingCoverage);
|
||||
|
||||
ReferenceIterator refSite = refIter.seekForward(locus.getLocation());
|
||||
ReferenceIterator refSite = refIter.seekForward(window);
|
||||
StringBuffer refBases = new StringBuffer(refSite.getBaseAsString());
|
||||
int locusLength = (int)(rightmostIndex - leftmostIndex);
|
||||
for ( int i = 0; i < locusLength; i++ ) {
|
||||
refSite = refSite.next();
|
||||
refBases.append(refSite.getBaseAsChar());
|
||||
}
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this interval
|
||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
|
||||
sum = walkAtinterval( walker, sum, locus, refSite, tracker );
|
||||
sum = walkAtinterval( walker, sum, locus, refBases.toString(), tracker );
|
||||
|
||||
//System.out.format("Working at %s\n", locus.getLocation().toString());
|
||||
|
||||
|
|
@ -132,24 +145,17 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
protected <M, T> T walkAtinterval( final LocusWindowWalker<M, T> walker,
|
||||
T sum,
|
||||
final LocusContext locus,
|
||||
final ReferenceIterator refSite,
|
||||
final String refSeq,
|
||||
final RefMetaDataTracker tracker ) {
|
||||
ReferenceIterator refSiteCopy = refSite;
|
||||
StringBuffer refBases = new StringBuffer(refSiteCopy.getBaseAsString());
|
||||
int locusLength = (int)(locus.getLocation().getStop() - locus.getLocation().getStart());
|
||||
for ( int i = 0; i < locusLength; i++ ) {
|
||||
refSiteCopy = refSiteCopy.next();
|
||||
refBases.append(refSiteCopy.getBaseAsChar());
|
||||
}
|
||||
|
||||
//logger.debug(String.format(" Reference: %s:%d %c", refSite.getCurrentContig().getName(), refSite.getPosition(), refBase));
|
||||
|
||||
//
|
||||
// Execute our contract with the walker. Call filter, map, and reduce
|
||||
//
|
||||
final boolean keepMeP = walker.filter(tracker, refBases.toString(), locus);
|
||||
final boolean keepMeP = walker.filter(tracker, refSeq, locus);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(tracker, refBases.toString(), locus);
|
||||
M x = walker.map(tracker, refSeq, locus);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,18 +6,41 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.playground.indels.*;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObjectComparatorAdapter;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeSet;
|
||||
import java.io.File;
|
||||
|
||||
@WalkerName("IntervalCleaner")
|
||||
public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer> {
|
||||
@Argument(fullName="maxReadLength", shortName="maxRead", required=false, defaultValue="-1")
|
||||
public int maxReadLength;
|
||||
@Argument(fullName="OutputCleaned", shortName="O", required=true, doc="Output file (sam or bam) for improved (realigned) reads")
|
||||
public String OUT;
|
||||
|
||||
public void initialize() {}
|
||||
private SAMFileWriter writer;
|
||||
|
||||
public void initialize() {
|
||||
SAMFileHeader header = getToolkit().getSamReader().getFileHeader();
|
||||
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, new File(OUT));
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, String ref, LocusContext context) {
|
||||
out.println(context.getLocation() + " (" + context.getReads().size() + ") => " + ref);
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
ArrayList<SAMRecord> goodReads = new ArrayList<SAMRecord>();
|
||||
long leftmostIndex = context.getLocation().getStart();
|
||||
for ( SAMRecord read : reads ) {
|
||||
if ( read.getReadLength() <= maxReadLength )
|
||||
goodReads.add(read);
|
||||
}
|
||||
clean(goodReads, ref, context.getLocation().getStart());
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -32,4 +55,21 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer> {
|
|||
public void onTraversalDone(Integer result) {
|
||||
out.println("Saw " + result + " intervals");
|
||||
}
|
||||
|
||||
private void clean(List<SAMRecord> reads, String reference, long leftmostIndex) {
|
||||
// total mismatches across all reads
|
||||
//int totalMismatches = 0;
|
||||
//TreeSet< CountedObject<Indel> > all_indels = new TreeSet< CountedObject<Indel> >(
|
||||
// new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
|
||||
|
||||
for ( SAMRecord read : reads ) {
|
||||
System.out.println(read.getReadString());
|
||||
System.out.println(reference.substring(read.getAlignmentStart()-(int)leftmostIndex, read.getAlignmentEnd()-(int)leftmostIndex+1));
|
||||
//totalMismatches += AlignmentUtils.numMismatches(read, reference);
|
||||
//System.out.println(totalMismatches + "\n");
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -130,6 +130,19 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
public int getNumReadsWritten() { return total_reads_written; }
|
||||
|
||||
public void receive(Collection<SAMRecord> c) {
|
||||
int startOnRef = 1000000000; // absolute start (leftmost) position of the pile of reads on the ref
|
||||
int stopOnRef = 0; // absolute stop (rightmost) position of the pile of reads on the ref (rightmost alignment end)
|
||||
for ( SAMRecord r : c ) {
|
||||
startOnRef = Math.min(startOnRef, r.getAlignmentStart() );
|
||||
stopOnRef = Math.max(stopOnRef,r.getAlignmentEnd());
|
||||
}
|
||||
|
||||
// part of the reference covered by original alignments:
|
||||
String pileRef = referenceSequence.substring(startOnRef-1,stopOnRef);
|
||||
receive(c, pileRef, startOnRef);
|
||||
}
|
||||
|
||||
public void receive(Collection<SAMRecord> c, String pileRef, int startOnRef) {
|
||||
|
||||
//TODO: if read starts/ends with an indel (insertion, actually), we detect this as a "different" indel introduced during cleanup.
|
||||
processed_piles++;
|
||||
|
|
@ -137,17 +150,10 @@ public class PileBuilder implements RecordPileReceiver {
|
|||
|
||||
IndexedSequence[] seqs = new IndexedSequence[c.size()];
|
||||
int i = 0;
|
||||
int startOnRef = 1000000000; // absolute start (leftmost) position of the pile of reads on the ref
|
||||
int stopOnRef = 0; // absolute stop (rightmost) position of the pile of reads on the ref (rightmost alignment end)
|
||||
for ( SAMRecord r : c ) {
|
||||
seqs[i++] = new IndexedSequence(r.getReadString(),KmerSize);
|
||||
startOnRef = Math.min(startOnRef, r.getAlignmentStart() );
|
||||
stopOnRef = Math.max(stopOnRef,r.getAlignmentEnd());
|
||||
}
|
||||
|
||||
// part of the reference covered by original alignments:
|
||||
String pileRef = referenceSequence.substring(startOnRef-1,stopOnRef);
|
||||
|
||||
int totalMismatches = 0; // total mismatches across all reads
|
||||
TreeSet< CountedObject<Indel> > all_indels = new TreeSet< CountedObject<Indel> >(
|
||||
new CountedObjectComparatorAdapter<Indel>(new IntervalComparator()));
|
||||
|
|
|
|||
Loading…
Reference in New Issue