Cleanup of the cleaned read injector based on Eric's feedback.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1062 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a0a3cf2f9f
commit
dde52e33eb
|
|
@ -7,6 +7,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
|
|
@ -420,6 +421,16 @@ public abstract class TraversalEngine {
|
|||
final SAMFileHeader header = samReader.getFileHeader();
|
||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||
|
||||
// Kludge filename into sam file header.
|
||||
if (samReader.getFileHeader().getReadGroups().size() < 1) {
|
||||
//logger.warn("Setting header in reader " + f.getName());
|
||||
SAMReadGroupRecord rec = new SAMReadGroupRecord(samFile.getName());
|
||||
rec.setLibrary(samFile.getName());
|
||||
rec.setSample(samFile.getName());
|
||||
|
||||
samReader.getFileHeader().addReadGroup(rec);
|
||||
}
|
||||
|
||||
return samReader;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
|
|
@ -14,7 +16,10 @@ import java.util.*;
|
|||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -86,9 +91,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
for ( GenomeLoc interval : locations ) {
|
||||
logger.debug(String.format("Processing interval %s", interval.toString()));
|
||||
|
||||
CloseableIterator<SAMRecord> readIter = samReader.queryOverlapping( interval.getContig(),
|
||||
(int)interval.getStart(),
|
||||
(int)interval.getStop());
|
||||
CloseableIterator<SAMRecord> readIter = getIteratorOverDesiredRegion( samReader, interval );
|
||||
Iterator<SAMRecord> wrappedIter = wrapReadsIterator(readIter, false);
|
||||
LocusContext locus = getLocusContext(wrappedIter, interval);
|
||||
readIter.close();
|
||||
|
|
@ -114,7 +117,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
GenomeLoc currentInterval = (locations.size() > 0 ? locations.get(0) : null);
|
||||
int locationsIndex = 0;
|
||||
ArrayList<SAMRecord> intervalReads = new ArrayList<SAMRecord>();
|
||||
Iterator<SAMRecord> readIter = samReader.iterator();
|
||||
Iterator<SAMRecord> readIter = getIteratorOverDesiredRegion(samReader,null);
|
||||
|
||||
while (readIter.hasNext()) {
|
||||
TraversalStatistics.nRecords++;
|
||||
|
|
@ -306,4 +309,19 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
//printProgress("intervals", interval.getLocation());
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets an iterator over the specified region. Uses a special iterator that dynamically adds a header to all
|
||||
* read information.
|
||||
* @param reader SAMFileReader to query.
|
||||
* @param region Region to use.
|
||||
* @return An iterator over the desired region.
|
||||
*/
|
||||
private CloseableIterator<SAMRecord> getIteratorOverDesiredRegion( SAMFileReader reader, GenomeLoc region ) {
|
||||
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger( Collections.singletonList(reader), SAMFileHeader.SortOrder.coordinate );
|
||||
MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2( headerMerger, new Reads(readsFiles) );
|
||||
if( region != null )
|
||||
iterator.queryOverlapping( region.getContig(), (int)region.getStart(), (int)region.getStop() );
|
||||
return iterator;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
|
|
@ -14,7 +15,8 @@ import java.util.Map;
|
|||
import java.util.HashMap;
|
||||
import java.util.Queue;
|
||||
import java.util.LinkedList;
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 10, 2009
|
||||
|
|
@ -74,7 +76,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
|
||||
@Override
|
||||
public void initialize() {
|
||||
intervals = new LinkedList<GenomeLoc>( GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false) );
|
||||
intervals = parseIntervals( intervalsSource );
|
||||
interval = intervals.remove();
|
||||
loadCleanedReadsOverlappingInterval( interval );
|
||||
|
||||
|
|
@ -144,6 +146,21 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
outputBAM.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the intervals directly from the command-line or from file, as appropriate.
|
||||
* Merge overlapping intervals.
|
||||
* @param intervalsSource Source of intervals.
|
||||
* @return a queue of sorted, merged intervals.
|
||||
*/
|
||||
private Queue parseIntervals( String intervalsSource ) {
|
||||
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false);
|
||||
GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet();
|
||||
for( GenomeLoc parsedInterval: parsedIntervals )
|
||||
intervalSortedSet.addRegion(parsedInterval);
|
||||
|
||||
return new LinkedList<GenomeLoc>( intervalSortedSet );
|
||||
}
|
||||
|
||||
/**
|
||||
* Load a list of all the reads overlapping the given interval into memory.
|
||||
* @param interval
|
||||
|
|
@ -153,7 +170,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
// Load in all reads mapped to this region. The cleaner will augment the read name in a way that uniquifies it.
|
||||
while( overlappingReads.hasNext() ) {
|
||||
SAMRecord read = overlappingReads.next();
|
||||
cleanedReads.put( read.getReadName(), read );
|
||||
cleanedReads.put( getUniquifiedReadName(read), read );
|
||||
}
|
||||
overlappingReads.close();
|
||||
}
|
||||
|
|
@ -163,7 +180,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
* @param read read to uniquify.
|
||||
* @return A (hopefully) completely unique name for the read.
|
||||
*/
|
||||
static String getUniquifiedReadName( SAMRecord read ) {
|
||||
private static String getUniquifiedReadName( SAMRecord read ) {
|
||||
return String.format("%s.%s.%s.%s",read.getAttribute("RG"),read.getReadName(),read.getFlags(),read.getReadString());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.junit.Test;
|
|||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -39,7 +40,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
* The fasta, for comparison.
|
||||
*/
|
||||
protected static IndexedFastaSequenceFile sequenceFile = null;
|
||||
|
||||
|
||||
/**
|
||||
* Initialize the fasta.
|
||||
*/
|
||||
|
|
@ -83,8 +84,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
|
|
@ -103,8 +103,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
|
|
@ -127,8 +126,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
ArtificialSAMUtils.createArtificialRead(header,"read5",1,5,5) };
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,1);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
|
|
@ -154,8 +152,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5) };
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,3,1);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
|
|
@ -176,8 +173,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
|
||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
||||
cleanedRead.setReadString("AAAAA");
|
||||
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||
|
||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||
|
|
@ -202,8 +198,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
for( int i = 0; i < sourceReads.length; i++ ) {
|
||||
try {
|
||||
cleanedReads[i] = (SAMRecord)sourceReads[i].clone();
|
||||
cleanedReads[i].setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[i]));
|
||||
cleanedReads[i].setReadString("AAAAA");
|
||||
cleanedReads[i].setBaseQualities(getMockBaseQualityString((byte)1,cleanedReads[i].getReadLength()));
|
||||
}
|
||||
catch( CloneNotSupportedException ex ) {
|
||||
throw new StingException("Unable to clone samrecord", ex);
|
||||
|
|
@ -252,4 +247,10 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
return accum;
|
||||
}
|
||||
|
||||
private byte[] getMockBaseQualityString( byte value, int length ) {
|
||||
byte[] baseQualities = new byte[length];
|
||||
Arrays.fill(baseQualities,value);
|
||||
return baseQualities;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue