LocusIteratorByState operates natively on GATKSAMRecords now
-- Updated code to reflect this new typing
This commit is contained in:
parent
94cb50d3d6
commit
e88dae2758
|
|
@ -29,12 +29,14 @@ import net.sf.picard.util.PeekableIterator;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIterator;
|
||||
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -70,7 +72,7 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
/**
|
||||
* Hold the read iterator so that it can be closed later.
|
||||
*/
|
||||
private final StingSAMIterator readIterator;
|
||||
private final GATKSAMIterator readIterator;
|
||||
|
||||
/**
|
||||
* The data source for reads. Will probably come directly from the BAM file.
|
||||
|
|
@ -107,12 +109,12 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
|
||||
this.sourceInfo = shard.getReadProperties();
|
||||
this.readIterator = iterator;
|
||||
this.readIterator = new GATKSAMIterator(iterator);
|
||||
|
||||
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
|
||||
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler )
|
||||
throw new IllegalArgumentException("legacy downsampler no longer supported in the window maker");
|
||||
this.libs = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames);
|
||||
this.libs = new LocusIteratorByState(readIterator,sourceInfo,genomeLocParser,sampleNames);
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(libs);
|
||||
|
||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Temporarily hack to convert SAMRecords to GATKSAMRecords
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/11/13
|
||||
* Time: 1:19 PM
|
||||
*/
|
||||
public class GATKSAMIterator implements CloseableIterator<GATKSAMRecord>, Iterable<GATKSAMRecord> {
|
||||
final CloseableIterator<SAMRecord> it;
|
||||
|
||||
public GATKSAMIterator(final CloseableIterator<SAMRecord> it) {
|
||||
this.it = it;
|
||||
}
|
||||
|
||||
public GATKSAMIterator(final StingSAMIterator it) {
|
||||
this.it = it;
|
||||
}
|
||||
|
||||
@Override public boolean hasNext() { return it.hasNext(); }
|
||||
@Override public GATKSAMRecord next() { return (GATKSAMRecord)it.next(); }
|
||||
@Override public void remove() { it.remove(); }
|
||||
@Override public void close() { it.close(); }
|
||||
@Override public Iterator<GATKSAMRecord> iterator() { return this; }
|
||||
}
|
||||
|
|
@ -84,9 +84,9 @@ public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M
|
|||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
// Note that this must occur before we leave because we are outside the intervals because
|
||||
// reads may occur outside our intervals but overlap them in the future
|
||||
final Collection<SAMRecord> reads = locusView.getLIBS().transferReadsFromAllPreviousPileups();
|
||||
for( final SAMRecord read : reads ) {
|
||||
notifyOfCurrentPosition((GATKSAMRecord)read);
|
||||
final Collection<GATKSAMRecord> reads = locusView.getLIBS().transferReadsFromAllPreviousPileups();
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
notifyOfCurrentPosition(read);
|
||||
// most of the time maybeDuplicatedReads is empty
|
||||
// TODO -- I believe that because of the ordering of reads that as soon as we don't find a read in the
|
||||
// TODO -- potential list of duplicates we can clear the hashset
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -87,8 +86,8 @@ public class AlignmentStateMachine {
|
|||
private int offsetIntoCurrentCigarElement;
|
||||
|
||||
@Requires({"read != null", "read.getAlignmentStart() != -1", "read.getCigar() != null"})
|
||||
public AlignmentStateMachine(final SAMRecord read) {
|
||||
this.read = (GATKSAMRecord)read;
|
||||
public AlignmentStateMachine(final GATKSAMRecord read) {
|
||||
this.read = read;
|
||||
this.cigar = read.getCigar();
|
||||
this.nCigarElements = cigar.numCigarElements();
|
||||
initializeAsLeftEdge();
|
||||
|
|
@ -110,7 +109,7 @@ public class AlignmentStateMachine {
|
|||
* @return a non-null GATKSAMRecord
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public SAMRecord getRead() {
|
||||
public GATKSAMRecord getRead() {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,193 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecordIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.GATKSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Caliper microbenchmark of fragment pileup
|
||||
*/
|
||||
public class LIBSPerformance extends CommandLineProgram {
|
||||
private static Logger logger = Logger.getLogger(LIBSPerformance.class);
|
||||
|
||||
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = true)
|
||||
public File samFile = null;
|
||||
|
||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = true)
|
||||
public File referenceFile = null;
|
||||
|
||||
@Argument(fullName = "L", shortName = "L", doc = "Query location", required = false)
|
||||
public String location = null;
|
||||
|
||||
|
||||
@Override
|
||||
public int execute() throws IOException {
|
||||
final IndexedFastaSequenceFile reference = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(reference);
|
||||
|
||||
final SAMFileReader reader = new SAMFileReader(samFile);
|
||||
reader.setSAMRecordFactory(new GATKSamRecordFactory());
|
||||
|
||||
SAMRecordIterator rawIterator;
|
||||
if ( location == null )
|
||||
rawIterator = reader.iterator();
|
||||
else {
|
||||
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(location);
|
||||
rawIterator = reader.query(loc.getContig(), loc.getStart(), loc.getStop(), false);
|
||||
}
|
||||
|
||||
final GATKSAMIterator iterator = new GATKSAMIterator(rawIterator);
|
||||
|
||||
final Set<String> samples = new HashSet<String>();
|
||||
for ( final SAMReadGroupRecord rg : reader.getFileHeader().getReadGroups() )
|
||||
samples.add(rg.getSample());
|
||||
|
||||
final LIBSDownsamplingInfo ds = new LIBSDownsamplingInfo(false, -1);
|
||||
|
||||
final LocusIteratorByState libs =
|
||||
new LocusIteratorByState(
|
||||
iterator,
|
||||
ds,
|
||||
true,
|
||||
genomeLocParser,
|
||||
samples,
|
||||
false);
|
||||
|
||||
int bp = 0;
|
||||
while ( libs.hasNext() ) {
|
||||
AlignmentContext context = libs.next();
|
||||
if ( ++bp % 100000 == 0 )
|
||||
logger.info(bp + " iterations at " + context.getLocation());
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// private void syntheticTests() {
|
||||
// final int readLength = 101;
|
||||
// final int nReads = 10000;
|
||||
// final int locus = 1;
|
||||
//
|
||||
// SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
// final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
//
|
||||
// int nIterations = 0;
|
||||
// for ( final String cigar : Arrays.asList("101M", "50M10I40M", "50M10D40M") ) {
|
||||
// GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, readLength);
|
||||
// read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
// final byte[] quals = new byte[readLength];
|
||||
// for ( int i = 0; i < readLength; i++ )
|
||||
// quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
// read.setBaseQualities(quals);
|
||||
// read.setCigarString(cigar);
|
||||
//
|
||||
// for ( int j = 0; j < nReads; j++ ) {
|
||||
// for ( int i = 0; i < rep; i++ ) {
|
||||
// switch ( op ) {
|
||||
// case NEW_STATE:
|
||||
// {
|
||||
// final AlignmentStateMachine alignmentStateMachine = new AlignmentStateMachine(read);
|
||||
// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
// nIterations++;
|
||||
// }
|
||||
// }
|
||||
// break;
|
||||
//// case OLD_STATE:
|
||||
//// {
|
||||
//// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
|
||||
//// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
//// alignmentStateMachine.getRead();
|
||||
//// nIterations++;
|
||||
//// }
|
||||
//// }
|
||||
//// break;
|
||||
// case NEW_LIBS:
|
||||
// {
|
||||
// final List<GATKSAMRecord> reads = Collections.nCopies(30, read);
|
||||
// final org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState libs =
|
||||
// new org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState(
|
||||
// new LocusIteratorByStateBaseTest.FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
||||
// LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
// genomeLocParser,
|
||||
// LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
//
|
||||
// while ( libs.hasNext() ) {
|
||||
// AlignmentContext context = libs.next();
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// System.out.printf("iterations %d%n", nIterations);
|
||||
// }
|
||||
|
||||
/**
|
||||
* Required main method implementation.
|
||||
* @param argv Command-line argument text.
|
||||
* @throws Exception on error.
|
||||
*/
|
||||
public static void main(String[] argv) throws Exception {
|
||||
int returnCode = 0;
|
||||
try {
|
||||
LIBSPerformance instance = new LIBSPerformance();
|
||||
start(instance, argv);
|
||||
returnCode = 0;
|
||||
} catch(Exception ex) {
|
||||
returnCode = 1;
|
||||
ex.printStackTrace();
|
||||
throw ex;
|
||||
} finally {
|
||||
System.exit(returnCode);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -51,7 +50,7 @@ import java.util.*;
|
|||
*
|
||||
* There are a few constraints on required and ensured by LIBS:
|
||||
*
|
||||
* -- Requires the Iterator<SAMRecord> to returns reads in coordinate sorted order, consistent with the ordering
|
||||
* -- Requires the Iterator<GATKSAMRecord> to returns reads in coordinate sorted order, consistent with the ordering
|
||||
* defined by the SAM file format. That that for performance reasons this constraint isn't actually enforced.
|
||||
* The behavior of LIBS is undefined in the case where the reads are badly ordered.
|
||||
* -- The reads in the ReadBackedPileup are themselves in the order of appearance of the reads from the iterator.
|
||||
|
|
@ -126,7 +125,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* list of samples may contain a null element, and all reads without read groups will
|
||||
* be mapped to this null sample
|
||||
*/
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
public LocusIteratorByState(final Iterator<GATKSAMRecord> samIterator,
|
||||
final ReadProperties readInformation,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final Collection<String> samples) {
|
||||
|
|
@ -151,7 +150,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* be mapped to this null sample
|
||||
* @param maintainUniqueReadsList if true, we will keep the unique reads from off the samIterator and make them
|
||||
* available via the transferReadsFromAllPreviousPileups interface
|
||||
*/ protected LocusIteratorByState(final Iterator<SAMRecord> samIterator,
|
||||
*/ protected LocusIteratorByState(final Iterator<GATKSAMRecord> samIterator,
|
||||
final LIBSDownsamplingInfo downsamplingInfo,
|
||||
final boolean includeReadsWithDeletionAtLoci,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
|
|
@ -310,7 +309,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* of submitted reads, if enabled.
|
||||
*
|
||||
* The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the
|
||||
* underlying SAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for
|
||||
* underlying GATKSAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for
|
||||
* any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads
|
||||
* used across all pileups. This is necessary for LIBS to handle because attempting to do
|
||||
* so from the pileups coming out of LIBS is extremely expensive.
|
||||
|
|
@ -322,7 +321,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @return the current list
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public List<SAMRecord> transferReadsFromAllPreviousPileups() {
|
||||
public List<GATKSAMRecord> transferReadsFromAllPreviousPileups() {
|
||||
return readStates.transferSubmittedReads();
|
||||
}
|
||||
|
||||
|
|
@ -331,7 +330,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @return a non-null list
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected List<SAMRecord> getReadsFromAllPreviousPileups() {
|
||||
protected List<GATKSAMRecord> getReadsFromAllPreviousPileups() {
|
||||
return readStates.getSubmittedReads();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,9 +28,9 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -50,30 +50,30 @@ import java.util.*;
|
|||
*/
|
||||
class ReadStateManager {
|
||||
private final List<String> samples;
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final PeekableIterator<GATKSAMRecord> iterator;
|
||||
private final SamplePartitioner<GATKSAMRecord> samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
|
||||
private LinkedList<SAMRecord> submittedReads;
|
||||
private LinkedList<GATKSAMRecord> submittedReads;
|
||||
private final boolean keepSubmittedReads;
|
||||
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(final Iterator<SAMRecord> source,
|
||||
public ReadStateManager(final Iterator<GATKSAMRecord> source,
|
||||
final List<String> samples,
|
||||
final LIBSDownsamplingInfo LIBSDownsamplingInfo,
|
||||
final boolean keepSubmittedReads) {
|
||||
this.samples = samples;
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
this.iterator = new PeekableIterator<GATKSAMRecord>(source);
|
||||
|
||||
this.keepSubmittedReads = keepSubmittedReads;
|
||||
this.submittedReads = new LinkedList<SAMRecord>();
|
||||
this.submittedReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager(LIBSDownsamplingInfo));
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(LIBSDownsamplingInfo, samples);
|
||||
samplePartitioner = new SamplePartitioner<GATKSAMRecord>(LIBSDownsamplingInfo, samples);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -138,12 +138,12 @@ class ReadStateManager {
|
|||
}
|
||||
|
||||
// fast testing of position
|
||||
private boolean readIsPastCurrentPosition(SAMRecord read) {
|
||||
private boolean readIsPastCurrentPosition(GATKSAMRecord read) {
|
||||
if (isEmpty())
|
||||
return false;
|
||||
else {
|
||||
AlignmentStateMachine state = getFirst();
|
||||
SAMRecord ourRead = state.getRead();
|
||||
final AlignmentStateMachine state = getFirst();
|
||||
final GATKSAMRecord ourRead = state.getRead();
|
||||
return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
|
||||
}
|
||||
}
|
||||
|
|
@ -172,7 +172,7 @@ class ReadStateManager {
|
|||
samplePartitioner.doneSubmittingReads();
|
||||
|
||||
for (final String sample : samples) {
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
final Collection<GATKSAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
|
@ -185,7 +185,7 @@ class ReadStateManager {
|
|||
* @param read a non-null read
|
||||
*/
|
||||
@Requires("read != null")
|
||||
protected void submitRead(final SAMRecord read) {
|
||||
protected void submitRead(final GATKSAMRecord read) {
|
||||
if ( keepSubmittedReads )
|
||||
submittedReads.add(read);
|
||||
samplePartitioner.submitRead(read);
|
||||
|
|
@ -213,11 +213,11 @@ class ReadStateManager {
|
|||
"result != null",
|
||||
"result != submittedReads" // result and previous submitted reads are not == objects
|
||||
})
|
||||
public List<SAMRecord> transferSubmittedReads() {
|
||||
public List<GATKSAMRecord> transferSubmittedReads() {
|
||||
if ( ! keepSubmittedReads ) throw new UnsupportedOperationException("cannot transferSubmittedReads if you aren't keeping them");
|
||||
|
||||
final List<SAMRecord> prevSubmittedReads = submittedReads;
|
||||
this.submittedReads = new LinkedList<SAMRecord>();
|
||||
final List<GATKSAMRecord> prevSubmittedReads = submittedReads;
|
||||
this.submittedReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
return prevSubmittedReads;
|
||||
}
|
||||
|
|
@ -244,7 +244,7 @@ class ReadStateManager {
|
|||
* @return a non-null list of reads that have been submitted to this ReadStateManager
|
||||
*/
|
||||
@Ensures({"result != null","keepSubmittedReads || result.isEmpty()"})
|
||||
protected List<SAMRecord> getSubmittedReads() {
|
||||
protected List<GATKSAMRecord> getSubmittedReads() {
|
||||
return submittedReads;
|
||||
}
|
||||
|
||||
|
|
@ -254,13 +254,13 @@ class ReadStateManager {
|
|||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<GATKSAMRecord> reads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<AlignmentStateMachine> newReadStates = new LinkedList<AlignmentStateMachine>();
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
for (GATKSAMRecord read : reads) {
|
||||
AlignmentStateMachine state = new AlignmentStateMachine(read);
|
||||
if ( state.stepForwardOnGenome() != null )
|
||||
// explicitly filter out reads that are all insertions / soft clips
|
||||
|
|
|
|||
|
|
@ -37,35 +37,35 @@ import java.util.*;
|
|||
*
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
class SamplePartitioner {
|
||||
private Map<String, Downsampler<SAMRecord>> readsBySample;
|
||||
class SamplePartitioner<T extends SAMRecord> {
|
||||
private Map<String, Downsampler<T>> readsBySample;
|
||||
|
||||
public SamplePartitioner(final LIBSDownsamplingInfo LIBSDownsamplingInfo, final List<String> samples) {
|
||||
readsBySample = new HashMap<String, Downsampler<SAMRecord>>(samples.size());
|
||||
readsBySample = new HashMap<String, Downsampler<T>>(samples.size());
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample, createDownsampler(LIBSDownsamplingInfo));
|
||||
}
|
||||
}
|
||||
|
||||
private Downsampler<SAMRecord> createDownsampler(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
private Downsampler<T> createDownsampler(final LIBSDownsamplingInfo LIBSDownsamplingInfo) {
|
||||
return LIBSDownsamplingInfo.isPerformDownsampling()
|
||||
? new ReservoirDownsampler<SAMRecord>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: new PassThroughDownsampler<SAMRecord>();
|
||||
? new ReservoirDownsampler<T>(LIBSDownsamplingInfo.getToCoverage())
|
||||
: new PassThroughDownsampler<T>();
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
public void submitRead(T read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submit(read);
|
||||
}
|
||||
|
||||
public void doneSubmittingReads() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
for ( Map.Entry<String, Downsampler<T>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().signalEndOfInput();
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
public Collection<T> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ class SamplePartitioner {
|
|||
}
|
||||
|
||||
public void reset() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
for ( Map.Entry<String, Downsampler<T>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().clear();
|
||||
perSampleReads.getValue().reset();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -335,13 +335,13 @@ public class ArtificialSAMUtils {
|
|||
*
|
||||
* @return a collection of stackSize reads all sharing the above properties
|
||||
*/
|
||||
public static List<SAMRecord> createReadStream( final int nReadsPerLocus,
|
||||
public static List<GATKSAMRecord> createReadStream( final int nReadsPerLocus,
|
||||
final int nLoci,
|
||||
final SAMFileHeader header,
|
||||
final int alignmentStart,
|
||||
final int length ) {
|
||||
final String baseName = "read";
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(nReadsPerLocus*nLoci);
|
||||
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(nReadsPerLocus*nLoci);
|
||||
for ( int locus = 0; locus < nLoci; locus++ ) {
|
||||
for ( int readI = 0; readI < nReadsPerLocus; readI++ ) {
|
||||
for ( final SAMReadGroupRecord rg : header.getReadGroups() ) {
|
||||
|
|
|
|||
|
|
@ -1,110 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.locusiterator;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Caliper microbenchmark of fragment pileup
|
||||
*/
|
||||
public class AlignmentStateMachinePerformance {
|
||||
final static int readLength = 101;
|
||||
final static int nReads = 10000;
|
||||
final static int locus = 1;
|
||||
|
||||
private enum Op {
|
||||
NEW_STATE, OLD_STATE, NEW_LIBS
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
final int rep = Integer.valueOf(args[0]);
|
||||
final Op op = Op.valueOf(args[1]);
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
|
||||
int nIterations = 0;
|
||||
for ( final String cigar : Arrays.asList("101M", "50M10I40M", "50M10D40M") ) {
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, readLength);
|
||||
read.setReadBases(Utils.dupBytes((byte) 'A', readLength));
|
||||
final byte[] quals = new byte[readLength];
|
||||
for ( int i = 0; i < readLength; i++ )
|
||||
quals[i] = (byte)(i % QualityUtils.MAX_QUAL_SCORE);
|
||||
read.setBaseQualities(quals);
|
||||
read.setCigarString(cigar);
|
||||
|
||||
for ( int j = 0; j < nReads; j++ ) {
|
||||
for ( int i = 0; i < rep; i++ ) {
|
||||
switch ( op ) {
|
||||
case NEW_STATE:
|
||||
{
|
||||
final AlignmentStateMachine alignmentStateMachine = new AlignmentStateMachine(read);
|
||||
while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
nIterations++;
|
||||
}
|
||||
}
|
||||
break;
|
||||
// case OLD_STATE:
|
||||
// {
|
||||
// final SAMRecordAlignmentState alignmentStateMachine = new SAMRecordAlignmentState(read);
|
||||
// while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
// alignmentStateMachine.getRead();
|
||||
// nIterations++;
|
||||
// }
|
||||
// }
|
||||
// break;
|
||||
case NEW_LIBS:
|
||||
{
|
||||
final List<SAMRecord> reads = Collections.nCopies(30, (SAMRecord) read);
|
||||
final org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState libs =
|
||||
new org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState(
|
||||
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
genomeLocParser,
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
||||
while ( libs.hasNext() ) {
|
||||
AlignmentContext context = libs.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
System.out.printf("iterations %d%n", nIterations);
|
||||
}
|
||||
}
|
||||
|
|
@ -46,7 +46,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
protected SAMFileHeader header;
|
||||
protected GenomeLocParser genomeLocParser;
|
||||
|
||||
List<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||
List<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
final int readLength = 101;
|
||||
final int nReads = 10000;
|
||||
final int locus = 1;
|
||||
|
|
@ -104,7 +104,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
for ( int i = 0; i < rep; i++ ) {
|
||||
final org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState libs =
|
||||
new org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState(
|
||||
new LocusIteratorByStateBaseTest.FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
new LocusIteratorByStateBaseTest.FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
||||
LocusIteratorByStateBaseTest.createTestReadProperties(),
|
||||
genomeLocParser,
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
|
@ -128,7 +128,7 @@ public class LocusIteratorBenchmark extends SimpleBenchmark {
|
|||
|
||||
public void timeAlignmentStateMachine(int rep) {
|
||||
for ( int i = 0; i < rep; i++ ) {
|
||||
for ( final SAMRecord read : reads ) {
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
final AlignmentStateMachine alignmentStateMachine = new AlignmentStateMachine(read);
|
||||
while ( alignmentStateMachine.stepForwardOnGenome() != null ) {
|
||||
;
|
||||
|
|
|
|||
|
|
@ -57,9 +57,9 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
protected LocusIteratorByState makeLTBS(List<SAMRecord> reads,
|
||||
protected LocusIteratorByState makeLTBS(List<GATKSAMRecord> reads,
|
||||
ReadProperties readAttributes) {
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
return new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
||||
readAttributes,
|
||||
genomeLocParser,
|
||||
LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
|
|
@ -85,7 +85,7 @@ public class LocusIteratorByStateBaseTest extends BaseTest {
|
|||
keepReads);
|
||||
}
|
||||
|
||||
protected static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
public static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
|
|
|
|||
|
|
@ -61,27 +61,27 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||
GATKSAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
|
||||
r1.setReadBases(bases1);
|
||||
r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r1.setCigarString("10M");
|
||||
|
||||
SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||
GATKSAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
|
||||
r2.setReadBases(bases2);
|
||||
r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r2.setCigarString("3=1X5=1X");
|
||||
|
||||
SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||
GATKSAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
|
||||
r3.setReadBases(bases2);
|
||||
r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
r3.setCigarString("3=1X5M1X");
|
||||
|
||||
SAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||
GATKSAMRecord r4 = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
|
||||
r4.setReadBases(bases2);
|
||||
r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
r4.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||
List<GATKSAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
|
@ -101,22 +101,22 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
GATKSAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
|
||||
before.setReadBases(bases);
|
||||
before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
before.setCigarString("10M");
|
||||
|
||||
SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
|
||||
GATKSAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
|
||||
during.setReadBases(indelBases);
|
||||
during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
|
||||
during.setCigarString("4M2I6M");
|
||||
|
||||
SAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
|
||||
GATKSAMRecord after = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
|
||||
after.setReadBases(bases);
|
||||
after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
|
||||
after.setCigarString("10M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(before, during, after);
|
||||
List<GATKSAMRecord> reads = Arrays.asList(before, during, after);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,readAttributes);
|
||||
|
|
@ -146,12 +146,12 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
// create a test version of the Reads object
|
||||
ReadProperties readAttributes = createTestReadProperties();
|
||||
|
||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
|
||||
GATKSAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
|
||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
|
||||
indelOnlyRead.setCigarString("76I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||
List<GATKSAMRecord> reads = Arrays.asList(indelOnlyRead);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, readAttributes);
|
||||
|
|
@ -174,22 +174,22 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
public void testWholeIndelRead() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
|
||||
GATKSAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
|
||||
leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
leadingRead.setCigarString("1M75I");
|
||||
|
||||
SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
||||
GATKSAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
|
||||
indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
|
||||
indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
indelOnlyRead.setCigarString("76I");
|
||||
|
||||
SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
|
||||
GATKSAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
|
||||
fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
|
||||
fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
|
||||
fullMatchAfterIndel.setCigarString("75I1M");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
|
||||
List<GATKSAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
|
@ -225,12 +225,12 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
public void testWholeIndelReadRepresentedTest() {
|
||||
final int firstLocus = 44367788, secondLocus = firstLocus + 1;
|
||||
|
||||
SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
|
||||
GATKSAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
|
||||
read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
|
||||
read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
|
||||
read1.setCigarString("1I");
|
||||
|
||||
List<SAMRecord> reads = Arrays.asList(read1);
|
||||
List<GATKSAMRecord> reads = Arrays.asList(read1);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads, createTestReadProperties());
|
||||
|
|
@ -246,7 +246,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
// Assert.assertEquals(pe.getBasesOfImmediatelyFollowingInsertion(), "A");
|
||||
}
|
||||
|
||||
SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
|
||||
GATKSAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
|
||||
read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
|
||||
read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
|
||||
read2.setCigarString("10I");
|
||||
|
|
@ -302,7 +302,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
@Test(enabled = true && ! DEBUG, dataProvider = "IndelLengthAndBasesTest")
|
||||
public void testIndelLengthAndBasesTest(GATKSAMRecord read, final CigarOperator op, final int eventSize, final String eventBases) {
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
|
||||
li = makeLTBS(Arrays.asList((GATKSAMRecord)read), createTestReadProperties());
|
||||
|
||||
Assert.assertTrue(li.hasNext());
|
||||
|
||||
|
|
@ -354,7 +354,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
public void testLIBS(LIBSTest params) {
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
final GATKSAMRecord read = params.makeRead();
|
||||
li = makeLTBS(Arrays.asList((SAMRecord)read), createTestReadProperties());
|
||||
li = makeLTBS(Arrays.asList((GATKSAMRecord)read), createTestReadProperties());
|
||||
final LIBS_position tester = new LIBS_position(read);
|
||||
|
||||
int bpVisited = 0;
|
||||
|
|
@ -458,14 +458,14 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
final DownsamplingMethod downsampler = downsample
|
||||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
final List<SAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
|
||||
final List<GATKSAMRecord> reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength);
|
||||
li = new LocusIteratorByState(new FakeCloseableIterator<GATKSAMRecord>(reads.iterator()),
|
||||
createTestReadProperties(downsampler, keepReads),
|
||||
genomeLocParser,
|
||||
samples);
|
||||
|
||||
final Set<SAMRecord> seenSoFar = new HashSet<SAMRecord>();
|
||||
final Set<SAMRecord> keptReads = new HashSet<SAMRecord>();
|
||||
final Set<GATKSAMRecord> seenSoFar = new HashSet<GATKSAMRecord>();
|
||||
final Set<GATKSAMRecord> keptReads = new HashSet<GATKSAMRecord>();
|
||||
int bpVisited = 0;
|
||||
while ( li.hasNext() ) {
|
||||
bpVisited++;
|
||||
|
|
@ -482,11 +482,11 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
|
||||
seenSoFar.addAll(p.getReads());
|
||||
if ( keepReads && grabReadsAfterEachCycle ) {
|
||||
final List<SAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
||||
final List<GATKSAMRecord> locusReads = li.transferReadsFromAllPreviousPileups();
|
||||
|
||||
// the number of reads starting here
|
||||
int nReadsStartingHere = 0;
|
||||
for ( final SAMRecord read : p.getReads() )
|
||||
for ( final GATKSAMRecord read : p.getReads() )
|
||||
if ( read.getAlignmentStart() == alignmentContext.getPosition() )
|
||||
nReadsStartingHere++;
|
||||
|
||||
|
|
@ -499,7 +499,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
keptReads.addAll(locusReads);
|
||||
|
||||
// check that all reads we've seen so far are in our keptReads
|
||||
for ( final SAMRecord read : seenSoFar ) {
|
||||
for ( final GATKSAMRecord read : seenSoFar ) {
|
||||
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
}
|
||||
}
|
||||
|
|
@ -524,8 +524,8 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
|
||||
// check that the order of reads is the same as in our read list
|
||||
for ( int i = 0; i < reads.size(); i++ ) {
|
||||
final SAMRecord inputRead = reads.get(i);
|
||||
final SAMRecord keptRead = reads.get(i);
|
||||
final GATKSAMRecord inputRead = reads.get(i);
|
||||
final GATKSAMRecord keptRead = reads.get(i);
|
||||
Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -534,13 +534,13 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
|
||||
// check uniqueness
|
||||
final Set<String> readNames = new HashSet<String>();
|
||||
for ( final SAMRecord read : keptReads ) {
|
||||
for ( final GATKSAMRecord read : keptReads ) {
|
||||
Assert.assertFalse(readNames.contains(read.getReadName()), "Found duplicate reads in the kept reads");
|
||||
readNames.add(read.getReadName());
|
||||
}
|
||||
|
||||
// check that all reads we've seen are in our keptReads
|
||||
for ( final SAMRecord read : seenSoFar ) {
|
||||
for ( final GATKSAMRecord read : seenSoFar ) {
|
||||
Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.locusiterator;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
|
@ -63,7 +64,7 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
|||
|
||||
public void run() {
|
||||
final List<String> samples = LocusIteratorByState.sampleListForSAMWithoutReadGroups();
|
||||
final Iterator<SAMRecord> iterator = new LinkedList<SAMRecord>().iterator();
|
||||
final Iterator<GATKSAMRecord> iterator = new LinkedList<GATKSAMRecord>().iterator();
|
||||
ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false);
|
||||
ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING);
|
||||
|
||||
|
|
@ -146,10 +147,10 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest {
|
|||
int alignmentStart = 1;
|
||||
|
||||
for ( int readsThisStack : readCountsPerAlignmentStart ) {
|
||||
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<GATKSAMRecord> stackReads = new ArrayList<GATKSAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<AlignmentStateMachine> stackRecordStates = new ArrayList<AlignmentStateMachine>();
|
||||
|
||||
for ( SAMRecord read : stackReads ) {
|
||||
for ( GATKSAMRecord read : stackReads ) {
|
||||
stackRecordStates.add(new AlignmentStateMachine(read));
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue