Cleanup of the cleaned read injector based on Eric's feedback.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1062 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a0a3cf2f9f
commit
dde52e33eb
|
|
@ -7,6 +7,7 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
import net.sf.samtools.SAMFileReader.ValidationStringency;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import net.sf.samtools.util.RuntimeIOException;
|
import net.sf.samtools.util.RuntimeIOException;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.iterators.*;
|
import org.broadinstitute.sting.gatk.iterators.*;
|
||||||
|
|
@ -420,6 +421,16 @@ public abstract class TraversalEngine {
|
||||||
final SAMFileHeader header = samReader.getFileHeader();
|
final SAMFileHeader header = samReader.getFileHeader();
|
||||||
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
logger.debug(String.format("Sort order is: " + header.getSortOrder()));
|
||||||
|
|
||||||
|
// Kludge filename into sam file header.
|
||||||
|
if (samReader.getFileHeader().getReadGroups().size() < 1) {
|
||||||
|
//logger.warn("Setting header in reader " + f.getName());
|
||||||
|
SAMReadGroupRecord rec = new SAMReadGroupRecord(samFile.getName());
|
||||||
|
rec.setLibrary(samFile.getName());
|
||||||
|
rec.setSample(samFile.getName());
|
||||||
|
|
||||||
|
samReader.getFileHeader().addReadGroup(rec);
|
||||||
|
}
|
||||||
|
|
||||||
return samReader;
|
return samReader;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.traversals;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
|
||||||
|
|
@ -14,7 +16,10 @@ import java.util.*;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileReader;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import net.sf.picard.sam.SamFileHeaderMerger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -86,9 +91,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
||||||
for ( GenomeLoc interval : locations ) {
|
for ( GenomeLoc interval : locations ) {
|
||||||
logger.debug(String.format("Processing interval %s", interval.toString()));
|
logger.debug(String.format("Processing interval %s", interval.toString()));
|
||||||
|
|
||||||
CloseableIterator<SAMRecord> readIter = samReader.queryOverlapping( interval.getContig(),
|
CloseableIterator<SAMRecord> readIter = getIteratorOverDesiredRegion( samReader, interval );
|
||||||
(int)interval.getStart(),
|
|
||||||
(int)interval.getStop());
|
|
||||||
Iterator<SAMRecord> wrappedIter = wrapReadsIterator(readIter, false);
|
Iterator<SAMRecord> wrappedIter = wrapReadsIterator(readIter, false);
|
||||||
LocusContext locus = getLocusContext(wrappedIter, interval);
|
LocusContext locus = getLocusContext(wrappedIter, interval);
|
||||||
readIter.close();
|
readIter.close();
|
||||||
|
|
@ -114,7 +117,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
||||||
GenomeLoc currentInterval = (locations.size() > 0 ? locations.get(0) : null);
|
GenomeLoc currentInterval = (locations.size() > 0 ? locations.get(0) : null);
|
||||||
int locationsIndex = 0;
|
int locationsIndex = 0;
|
||||||
ArrayList<SAMRecord> intervalReads = new ArrayList<SAMRecord>();
|
ArrayList<SAMRecord> intervalReads = new ArrayList<SAMRecord>();
|
||||||
Iterator<SAMRecord> readIter = samReader.iterator();
|
Iterator<SAMRecord> readIter = getIteratorOverDesiredRegion(samReader,null);
|
||||||
|
|
||||||
while (readIter.hasNext()) {
|
while (readIter.hasNext()) {
|
||||||
TraversalStatistics.nRecords++;
|
TraversalStatistics.nRecords++;
|
||||||
|
|
@ -306,4 +309,19 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
||||||
//printProgress("intervals", interval.getLocation());
|
//printProgress("intervals", interval.getLocation());
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets an iterator over the specified region. Uses a special iterator that dynamically adds a header to all
|
||||||
|
* read information.
|
||||||
|
* @param reader SAMFileReader to query.
|
||||||
|
* @param region Region to use.
|
||||||
|
* @return An iterator over the desired region.
|
||||||
|
*/
|
||||||
|
private CloseableIterator<SAMRecord> getIteratorOverDesiredRegion( SAMFileReader reader, GenomeLoc region ) {
|
||||||
|
SamFileHeaderMerger headerMerger = new SamFileHeaderMerger( Collections.singletonList(reader), SAMFileHeader.SortOrder.coordinate );
|
||||||
|
MergingSamRecordIterator2 iterator = new MergingSamRecordIterator2( headerMerger, new Reads(readsFiles) );
|
||||||
|
if( region != null )
|
||||||
|
iterator.queryOverlapping( region.getContig(), (int)region.getStart(), (int)region.getStop() );
|
||||||
|
return iterator;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMFileReader;
|
import net.sf.samtools.SAMFileReader;
|
||||||
import net.sf.samtools.SAMFileWriter;
|
import net.sf.samtools.SAMFileWriter;
|
||||||
|
|
@ -14,7 +15,8 @@ import java.util.Map;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.io.File;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
/**
|
/**
|
||||||
* User: hanna
|
* User: hanna
|
||||||
* Date: Jun 10, 2009
|
* Date: Jun 10, 2009
|
||||||
|
|
@ -74,7 +76,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
intervals = new LinkedList<GenomeLoc>( GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false) );
|
intervals = parseIntervals( intervalsSource );
|
||||||
interval = intervals.remove();
|
interval = intervals.remove();
|
||||||
loadCleanedReadsOverlappingInterval( interval );
|
loadCleanedReadsOverlappingInterval( interval );
|
||||||
|
|
||||||
|
|
@ -144,6 +146,21 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
||||||
outputBAM.close();
|
outputBAM.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load the intervals directly from the command-line or from file, as appropriate.
|
||||||
|
* Merge overlapping intervals.
|
||||||
|
* @param intervalsSource Source of intervals.
|
||||||
|
* @return a queue of sorted, merged intervals.
|
||||||
|
*/
|
||||||
|
private Queue parseIntervals( String intervalsSource ) {
|
||||||
|
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource,false);
|
||||||
|
GenomeLocSortedSet intervalSortedSet = new GenomeLocSortedSet();
|
||||||
|
for( GenomeLoc parsedInterval: parsedIntervals )
|
||||||
|
intervalSortedSet.addRegion(parsedInterval);
|
||||||
|
|
||||||
|
return new LinkedList<GenomeLoc>( intervalSortedSet );
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load a list of all the reads overlapping the given interval into memory.
|
* Load a list of all the reads overlapping the given interval into memory.
|
||||||
* @param interval
|
* @param interval
|
||||||
|
|
@ -153,7 +170,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
||||||
// Load in all reads mapped to this region. The cleaner will augment the read name in a way that uniquifies it.
|
// Load in all reads mapped to this region. The cleaner will augment the read name in a way that uniquifies it.
|
||||||
while( overlappingReads.hasNext() ) {
|
while( overlappingReads.hasNext() ) {
|
||||||
SAMRecord read = overlappingReads.next();
|
SAMRecord read = overlappingReads.next();
|
||||||
cleanedReads.put( read.getReadName(), read );
|
cleanedReads.put( getUniquifiedReadName(read), read );
|
||||||
}
|
}
|
||||||
overlappingReads.close();
|
overlappingReads.close();
|
||||||
}
|
}
|
||||||
|
|
@ -163,7 +180,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
||||||
* @param read read to uniquify.
|
* @param read read to uniquify.
|
||||||
* @return A (hopefully) completely unique name for the read.
|
* @return A (hopefully) completely unique name for the read.
|
||||||
*/
|
*/
|
||||||
static String getUniquifiedReadName( SAMRecord read ) {
|
private static String getUniquifiedReadName( SAMRecord read ) {
|
||||||
return String.format("%s.%s.%s.%s",read.getAttribute("RG"),read.getReadName(),read.getFlags(),read.getReadString());
|
return String.format("%s.%s.%s.%s",read.getAttribute("RG"),read.getReadName(),read.getFlags(),read.getReadString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ import org.junit.Test;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
@ -83,8 +84,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||||
|
|
||||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||||
cleanedRead.setReadString("AAAAA");
|
|
||||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||||
|
|
||||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||||
|
|
@ -103,8 +103,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||||
|
|
||||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||||
cleanedRead.setReadString("AAAAA");
|
|
||||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||||
|
|
||||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||||
|
|
@ -127,8 +126,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
ArtificialSAMUtils.createArtificialRead(header,"read5",1,5,5) };
|
ArtificialSAMUtils.createArtificialRead(header,"read5",1,5,5) };
|
||||||
|
|
||||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,1);
|
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,1);
|
||||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
|
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||||
cleanedRead.setReadString("AAAAA");
|
|
||||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||||
|
|
||||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||||
|
|
@ -154,8 +152,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5) };
|
ArtificialSAMUtils.createArtificialRead(header,"read3",1,3,5) };
|
||||||
|
|
||||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,3,1);
|
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,3,1);
|
||||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[0]));
|
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||||
cleanedRead.setReadString("AAAAA");
|
|
||||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||||
|
|
||||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||||
|
|
@ -176,8 +173,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
SAMRecord sourceRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||||
|
|
||||||
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
SAMRecord cleanedRead = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1,5);
|
||||||
cleanedRead.setReadName(CleanedReadInjector.getUniquifiedReadName(sourceRead));
|
cleanedRead.setBaseQualities(getMockBaseQualityString((byte)1,cleanedRead.getReadLength()));
|
||||||
cleanedRead.setReadString("AAAAA");
|
|
||||||
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
ArtificialSAMFileReader cleanedReads = new ArtificialSAMFileReader(cleanedRead);
|
||||||
|
|
||||||
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
ArtificialSAMFileWriter output = new ArtificialSAMFileWriter();
|
||||||
|
|
@ -202,8 +198,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
for( int i = 0; i < sourceReads.length; i++ ) {
|
for( int i = 0; i < sourceReads.length; i++ ) {
|
||||||
try {
|
try {
|
||||||
cleanedReads[i] = (SAMRecord)sourceReads[i].clone();
|
cleanedReads[i] = (SAMRecord)sourceReads[i].clone();
|
||||||
cleanedReads[i].setReadName(CleanedReadInjector.getUniquifiedReadName(sourceReads[i]));
|
cleanedReads[i].setBaseQualities(getMockBaseQualityString((byte)1,cleanedReads[i].getReadLength()));
|
||||||
cleanedReads[i].setReadString("AAAAA");
|
|
||||||
}
|
}
|
||||||
catch( CloneNotSupportedException ex ) {
|
catch( CloneNotSupportedException ex ) {
|
||||||
throw new StingException("Unable to clone samrecord", ex);
|
throw new StingException("Unable to clone samrecord", ex);
|
||||||
|
|
@ -252,4 +247,10 @@ public class CleanedReadInjectorTest extends BaseTest {
|
||||||
return accum;
|
return accum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private byte[] getMockBaseQualityString( byte value, int length ) {
|
||||||
|
byte[] baseQualities = new byte[length];
|
||||||
|
Arrays.fill(baseQualities,value);
|
||||||
|
return baseQualities;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue