We need to sort cleaned reads ourselves (instead of letting SAMFileWriter
do it) because the SAM headers are often screwed up and claim to be "unsorted". While here, I broke off the module from the SortSamIterator in case someone else wants to use it. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@654 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c735e1f627
commit
009e71fcd9
|
|
@ -1,21 +1,13 @@
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Mar 15, 2009
|
|
||||||
* Time: 6:02:31 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class SortSamIterator implements Iterator<SAMRecord> {
|
public class SortSamIterator implements Iterator<SAMRecord> {
|
||||||
|
|
||||||
Iterator<ComparableSAMRecord> it;
|
Iterator<ComparableSAMRecord> it;
|
||||||
|
|
@ -25,9 +17,9 @@ public class SortSamIterator implements Iterator<SAMRecord> {
|
||||||
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
ArrayList<ComparableSAMRecord> list = new ArrayList<ComparableSAMRecord>();
|
||||||
while (unsortedIter.hasNext()) {
|
while (unsortedIter.hasNext()) {
|
||||||
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
list.add(new ComparableSAMRecord(unsortedIter.next()));
|
||||||
// choose an arbitrary length to limit sorting for now
|
// limit how much can be sorted for now
|
||||||
if (list.size() > maxSorts)
|
if (list.size() > maxSorts)
|
||||||
throw new UnsupportedOperationException("Can not sort files with more than 100K reads on the fly!");
|
throw new UnsupportedOperationException("Can not sort files with more than " + maxSorts + " reads on the fly!");
|
||||||
}
|
}
|
||||||
Collections.sort(list);
|
Collections.sort(list);
|
||||||
it = list.iterator();
|
it = list.iterator();
|
||||||
|
|
@ -39,23 +31,4 @@ public class SortSamIterator implements Iterator<SAMRecord> {
|
||||||
public void remove() {
|
public void remove() {
|
||||||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
|
|
||||||
|
|
||||||
private SAMRecord record;
|
|
||||||
|
|
||||||
public ComparableSAMRecord(SAMRecord record) {
|
|
||||||
this.record = record;
|
|
||||||
}
|
|
||||||
|
|
||||||
public SAMRecord getRecord() {
|
|
||||||
return record;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int compareTo(ComparableSAMRecord o) {
|
|
||||||
GenomeLoc myLoc = new GenomeLoc(record);
|
|
||||||
GenomeLoc hisLoc = new GenomeLoc(o.getRecord());
|
|
||||||
return myLoc.compareTo(hisLoc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.Pair;
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.ComparableSAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
|
|
@ -11,8 +12,7 @@ import org.broadinstitute.sting.playground.indels.*;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
@WalkerName("IntervalCleaner")
|
@WalkerName("IntervalCleaner")
|
||||||
|
|
@ -27,10 +27,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
public static final int MAX_QUAL = 99;
|
public static final int MAX_QUAL = 99;
|
||||||
|
|
||||||
private SAMFileWriter writer;
|
private SAMFileWriter writer;
|
||||||
|
// we need to sort the reads ourselves because SAM headers get messed up and claim to be "unsorted" sometimes
|
||||||
|
private TreeSet<ComparableSAMRecord> readsToWrite;
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
SAMFileHeader header = getToolkit().getSamReader().getFileHeader();
|
SAMFileHeader header = getToolkit().getSamReader().getFileHeader();
|
||||||
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, new File(OUT));
|
writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, new File(OUT));
|
||||||
|
readsToWrite = new TreeSet<ComparableSAMRecord>();
|
||||||
}
|
}
|
||||||
|
|
||||||
// do we care about reads that are not part of our intervals?
|
// do we care about reads that are not part of our intervals?
|
||||||
|
|
@ -54,7 +57,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START )
|
read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START )
|
||||||
goodReads.add(read);
|
goodReads.add(read);
|
||||||
else
|
else
|
||||||
writer.addAlignment(read);
|
readsToWrite.add(new ComparableSAMRecord(read));
|
||||||
}
|
}
|
||||||
|
|
||||||
clean(goodReads, ref, context.getLocation().getStart());
|
clean(goodReads, ref, context.getLocation().getStart());
|
||||||
|
|
@ -62,6 +65,9 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
//testCleanWithDeletion();
|
//testCleanWithDeletion();
|
||||||
//testCleanWithInsertion();
|
//testCleanWithInsertion();
|
||||||
|
|
||||||
|
Iterator<ComparableSAMRecord> iter = readsToWrite.iterator();
|
||||||
|
while ( iter.hasNext() )
|
||||||
|
writer.addAlignment(iter.next().getRecord());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -198,13 +204,13 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
// clean the appropriate reads
|
// clean the appropriate reads
|
||||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
||||||
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
||||||
|
}
|
||||||
|
|
||||||
// write them out
|
// write them out
|
||||||
for ( SAMRecord rec : refReads )
|
for ( SAMRecord rec : refReads )
|
||||||
writer.addAlignment(rec);
|
readsToWrite.add(new ComparableSAMRecord(rec));
|
||||||
for ( AlignedRead aRec : altReads )
|
for ( AlignedRead aRec : altReads )
|
||||||
writer.addAlignment(aRec.getRead());
|
readsToWrite.add(new ComparableSAMRecord(aRec.getRead()));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
|
private Pair<Integer, Integer> findBestOffset(String ref, AlignedRead read) {
|
||||||
|
|
@ -540,18 +546,18 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
|
// if the best alternate consensus has a smaller sum of quality score mismatches, then clean!
|
||||||
if ( bestConsensus.mismatchSum < totalMismatchSum ) {
|
if ( bestConsensus != null && bestConsensus.mismatchSum < totalMismatchSum ) {
|
||||||
logger.info("CLEAN: " + bestConsensus.str);
|
logger.info("CLEAN: " + bestConsensus.str);
|
||||||
|
|
||||||
// clean the appropriate reads
|
// clean the appropriate reads
|
||||||
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
for ( Pair<Integer, Integer> indexPair : bestConsensus.readIndexes )
|
||||||
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.getSecond(), altReads.get(indexPair.getFirst()), (int)leftmostIndex);
|
||||||
|
}
|
||||||
|
|
||||||
// write them out
|
// write them out
|
||||||
for ( SAMRecord rec : refReads )
|
for ( SAMRecord rec : refReads )
|
||||||
writer.addAlignment(rec);
|
readsToWrite.add(new ComparableSAMRecord(rec));
|
||||||
for ( AlignedRead aRec : altReads )
|
for ( AlignedRead aRec : altReads )
|
||||||
writer.addAlignment(aRec.getRead());
|
readsToWrite.add(new ComparableSAMRecord(aRec.getRead()));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
|
public class ComparableSAMRecord implements Comparable<ComparableSAMRecord> {
|
||||||
|
|
||||||
|
private SAMRecord record;
|
||||||
|
|
||||||
|
public ComparableSAMRecord(SAMRecord record) {
|
||||||
|
this.record = record;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SAMRecord getRecord() {
|
||||||
|
return record;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareTo(ComparableSAMRecord o) {
|
||||||
|
// first sort by start position
|
||||||
|
GenomeLoc myLoc = new GenomeLoc(record);
|
||||||
|
GenomeLoc hisLoc = new GenomeLoc(o.getRecord());
|
||||||
|
int comparison = myLoc.compareTo(hisLoc);
|
||||||
|
// if the reads have the same start position, we must give a non-zero comparison
|
||||||
|
// (because java Sets often require "consistency with equals")
|
||||||
|
if ( comparison == 0 )
|
||||||
|
comparison = record.getReadName().compareTo(o.getRecord().getReadName());
|
||||||
|
return comparison;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue