We need to sort cleaned reads ourselves (instead of letting SAMFileWriter
do it) because the SAM headers are often screwed up and claim to be "unsorted". While here, I broke off the module from the SortSamIterator in case someone else wants to use it. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@654 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c735e1f627
commit
009e71fcd9
|
|
@ -1,21 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mdepristo
|
||||
* Date: Mar 15, 2009
|
||||
* Time: 6:02:31 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SortSamIterator implements Iterator<SAMRecord> {
|
||||
|
||||
Iterator<ComparableSAMRecord> it;
|
||||
|
|
@ -25,9 +17,9 @@ public class SortSamIterator implements Iterator<SAMRecord> {
|
|||
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||
while (unsortedIter.hasNext()) {
|
||||
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||
// choose an arbitrary length to limit sorting for now
|
||||
// limit how much can be sorted for now
|
||||
if (list.size() > maxSorts)
|
||||
throw new UnsupportedOperationException("Can not sort files with more than 100K reads on the fly!");
|
||||
throw new UnsupportedOperationException("Can not sort files with more than " + maxSorts + " reads on the fly!");
|
||||
}
|
||||
Collections.sort(list);
|
||||
it = list.iterator();
|
||||
|
|
@ -39,23 +31,4 @@ public class SortSamIterator implements Iterator<SAMRecord> {
|
|||
public void remove() {
|
||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
|
||||
|
||||
private SAMRecord record;
|
||||
|
||||
public ComparableSAMRecord(SAMRecord record) {
|
||||
this.record = record;
|
||||
}
|
||||
|
||||
public SAMRecord getRecord() {
|
||||
return record;
|
||||
}
|
||||
|
||||
public int compareTo(ComparableSAMRecord o) {
|
||||
GenomeLoc myLoc = new GenomeLoc(record);
|
||||
GenomeLoc hisLoc = new GenomeLoc(o.getRecord());
|
||||
return myLoc.compareTo(hisLoc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
|
|
@ -11,8 +12,7 @@ import org.broadinstitute.sting.playground.indels.*;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
||||
@WalkerName("IntervalCleaner")
|
||||
|
|
@ -27,10 +27,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
public static final int MAX_QUAL = 99;
|
||||
|
||||
private SAMFileWriter writer;
|
||||
// we need to sort the reads ourselves because SAM headers get messed up and claim to be "unsorted" sometimes
|
||||
private TreeSet<ComparableSAMRecord> readsToWrite;
|
||||
|
||||
public void initialize() {
|
||||
SAMFileHeader header = getToolkit().getSamReader().getFileHeader();
|
||||
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, new File(OUT));
|
||||
readsToWrite = new TreeSet<ComparableSAMRecord>();
|
||||
}
|
||||
|
||||
// do we care about reads that are not part of our intervals?
|
||||
|
|
@ -54,7 +57,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START )
|
||||
goodReads.add(read);
|
||||
else
|
||||
writer.addAlignment(read);
|
||||
readsToWrite.add(new ComparableSAMRecord(read));
|
||||
}
|
||||
|
||||
clean(goodReads, ref, context.getLocation().getStart());
|
||||
|
|
@ -62,6 +65,9 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
//testCleanWithDeletion();
|
||||
//testCleanWithInsertion();
|
||||
|
||||
Iterator<ComparableSAMRecord> iter = readsToWrite.iterator();
|
||||
while ( iter.hasNext() )
|
||||
writer.addAlignment(iter.next().getRecord());
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -198,13 +204,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
// clean the appropriate reads
|
||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
||||
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
||||
|
||||
// write them out
|
||||
for ( SAMRecord rec : refReads )
|
||||
writer.addAlignment(rec);
|
||||
for ( AlignedRead aRec : altReads )
|
||||
writer.addAlignment(aRec.getRead());
|
||||
}
|
||||
|
||||
// write them out
|
||||
for ( SAMRecord rec : refReads )
|
||||
readsToWrite.add(new ComparableSAMRecord(rec));
|
||||
for ( AlignedRead aRec : altReads )
|
||||
readsToWrite.add(new ComparableSAMRecord(aRec.getRead()));
|
||||
}
|
||||
|
||||
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
|
||||
|
|
@ -540,18 +546,18 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
}
|
||||
|
||||
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
|
||||
if ( bestConsensus.mismatchSum < totalMismatchSum ) {
|
||||
if ( bestConsensus != null && bestConsensus.mismatchSum < totalMismatchSum ) {
|
||||
logger.info("CLEAN: " + bestConsensus.str);
|
||||
|
||||
// clean the appropriate reads
|
||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
||||
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
||||
|
||||
// write them out
|
||||
for ( SAMRecord rec : refReads )
|
||||
writer.addAlignment(rec);
|
||||
for ( AlignedRead aRec : altReads )
|
||||
writer.addAlignment(aRec.getRead());
|
||||
}
|
||||
|
||||
// write them out
|
||||
for ( SAMRecord rec : refReads )
|
||||
readsToWrite.add(new ComparableSAMRecord(rec));
|
||||
for ( AlignedRead aRec : altReads )
|
||||
readsToWrite.add(new ComparableSAMRecord(aRec.getRead()));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
public class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
|
||||
|
||||
private SAMRecord record;
|
||||
|
||||
public ComparableSAMRecord(SAMRecord record) {
|
||||
this.record = record;
|
||||
}
|
||||
|
||||
public SAMRecord getRecord() {
|
||||
return record;
|
||||
}
|
||||
|
||||
public int compareTo(ComparableSAMRecord o) {
|
||||
// first sort by start position
|
||||
GenomeLoc myLoc = new GenomeLoc(record);
|
||||
GenomeLoc hisLoc = new GenomeLoc(o.getRecord());
|
||||
int comparison = myLoc.compareTo(hisLoc);
|
||||
// if the reads have the same start position, we must give a non-zero comparison
|
||||
// (because java Sets often require "consistency with equals")
|
||||
if ( comparison == 0 )
|
||||
comparison = record.getReadName().compareTo(o.getRecord().getReadName());
|
||||
return comparison;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue