Massive expansion of ActiveRegionTraversal unit tests, resulting in several bugfixes to ART
-- UnitTests now include combinational tiling of reads within and spanning shard boundaries -- ART now properly handles shard transitions, and does so efficiently without requiring hash sets or other collections of reads -- Updating HC and CountReadsInActiveRegions integration tests
This commit is contained in:
parent
ddcb33fcf8
commit
2a42b47e4a
|
|
@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "1e2671557b01ad0497557097282965fc");
|
||||
HCTest(CEUTRIO_BAM, "", "b8f7b741445ce6b6ea491c794ce75c17");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "2bd237a7e1e63eebe755dbe7963e430a");
|
||||
HCTest(NA12878_BAM, "", "a2c63f6e6e51a01019bdbd23125bdb15");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"a938cdd7262968597fc8eb6c1c0a69f1");
|
||||
"c679ae7f04bdfda896b5c046d35e043c");
|
||||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
|
|
@ -102,7 +102,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"d590c8d6d5e58d685401b65a23846893");
|
||||
"1a034b7eb572e1b6f659d6e5d57b3e76");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "50a26224b9e863ee47a0619eb54a0323");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735");
|
||||
}
|
||||
|
||||
// That problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
|
|
@ -146,7 +146,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("4439496472eb1e2f5c91b30ba525be37"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("8b1b8d1bd7feac1503fc4ffa6236cff7"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
|
|
@ -47,24 +48,36 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* Implement active region traversal
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/9/13
|
||||
* Time: 4:45 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*
|
||||
* Live region:
|
||||
*
|
||||
* The ART tracks a thing called the live region. The live region is a position on a specific contig
|
||||
* of the alignment start of the last read we processed during this traversal. Because the
|
||||
* read stream is sorted, future reads must occurs in the the live region. Therefore the the dead region
|
||||
* (everything to the left of the live boundary) cannot have any more read data. The live / dead
|
||||
* regions are used to decide when we can safely call map on active regions, as only active regions
|
||||
* contained completely within the dead region (including extensions) have a complete set of read data
|
||||
* in the collected read list. All of the data related to the live region is captured by the local
|
||||
* variable spanOfLastReadSeen
|
||||
*
|
||||
*/
|
||||
public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,LocusShardDataProvider> {
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
protected final static boolean DEBUG = false;
|
||||
|
||||
// set by the tranversal
|
||||
private int activeRegionExtension = -1;
|
||||
private int maxRegionSize = -1;
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
protected final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
|
||||
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
private GenomeLoc spanOfLastReadSeen = null;
|
||||
|
||||
protected int getActiveRegionExtension() {
|
||||
return activeRegionExtension;
|
||||
|
|
@ -79,6 +92,11 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TraverseActiveRegions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(GenomeAnalysisEngine engine, Walker walker, ProgressMeter progressMeter) {
|
||||
super.initialize(engine, walker, progressMeter);
|
||||
|
|
@ -153,23 +171,58 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
}
|
||||
}
|
||||
|
||||
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
private Shard lastShard = null;
|
||||
/**
|
||||
* Did read appear in the last shard?
|
||||
*
|
||||
* When we transition across shard boundaries we see duplicate reads because
|
||||
* each shard contains the reads that *overlap* the shard. So if we just finished
|
||||
* shard 1-1000 and are now in 1001-2000 we'll see duplicate reads from 1001
|
||||
* that overlapped 1-1000. This function tests read to determine if we would have
|
||||
* seen it before by asking if read.getAlignmentStart() is less than the
|
||||
* stop position of the last seen read at the start of the traversal. The reason
|
||||
* we need to use the location of the last read at the start of the traversal
|
||||
* is that we update the lastRead during the traversal, and we only want to filter
|
||||
* out reads whose start is before the last read of the previous shard, not the
|
||||
* current shard.
|
||||
*
|
||||
* @param locOfLastReadAtTraversalStart the location of the last read seen at the start of the traversal
|
||||
* @param read the read we want to test if it's already been seen in the last shard
|
||||
* @return true if read would have appeared in the last shard, false otherwise
|
||||
*/
|
||||
protected boolean appearedInLastShard(final GenomeLoc locOfLastReadAtTraversalStart, final GATKSAMRecord read) {
|
||||
if ( locOfLastReadAtTraversalStart == null )
|
||||
// we're in the first shard, so obviously the answer is no
|
||||
return false;
|
||||
else {
|
||||
// otherwise check to see if the alignment occurred in the previous shard
|
||||
return read.getAlignmentStart() <= locOfLastReadAtTraversalStart.getStart()
|
||||
// we're on the same contig
|
||||
&& read.getReferenceIndex() == locOfLastReadAtTraversalStart.getContigIndex();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Actual traverse function
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Is the current shard on a new contig w.r.t. the previous shard?
|
||||
* @param currentShard the current shard we are processing
|
||||
* @return true if the last shard was on a different contig than the current shard
|
||||
*/
|
||||
private boolean onNewContig(final Shard currentShard) {
|
||||
return spanOfLastSeenRead() != null
|
||||
&& spanOfLastSeenRead().getContigIndex() != currentShard.getLocation().getContigIndex();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
if ( DEBUG ) logger.warn(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final HashSet<GATKSAMRecord> maybeDuplicatedReads = new HashSet<GATKSAMRecord>();
|
||||
// TODO -- there's got to be a better way to know this
|
||||
if ( lastShard != dataProvider.getShard() ) {
|
||||
maybeDuplicatedReads.addAll(myReads);
|
||||
logger.info("Crossing shard boundary requires us to check for duplicates against " + maybeDuplicatedReads.size() + " reads");
|
||||
if ( DEBUG ) logger.warn("Clearing myReads");
|
||||
}
|
||||
lastShard = dataProvider.getShard();
|
||||
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
|
|
@ -181,6 +234,12 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
final GenomeLoc locOfLastReadAtTraversalStart = spanOfLastSeenRead();
|
||||
|
||||
// if we've moved onto a new contig, process all of the active regions
|
||||
if ( onNewContig(dataProvider.getShard()) )
|
||||
sum = processActiveRegions(walker, sum, true);
|
||||
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
|
|
@ -191,15 +250,12 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
// reads may occur outside our intervals but overlap them in the future
|
||||
final Collection<GATKSAMRecord> reads = locusView.getLIBS().transferReadsFromAllPreviousPileups();
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
notifyOfCurrentPosition(read);
|
||||
// most of the time maybeDuplicatedReads is empty
|
||||
// TODO -- I believe that because of the ordering of reads that as soon as we don't find a read in the
|
||||
// TODO -- potential list of duplicates we can clear the hashset
|
||||
if ( ! maybeDuplicatedReads.isEmpty() && maybeDuplicatedReads.contains(read) ) {
|
||||
if ( appearedInLastShard(locOfLastReadAtTraversalStart, read) ) {
|
||||
if ( DEBUG ) logger.warn("Skipping duplicated " + read.getReadName());
|
||||
} else {
|
||||
if ( DEBUG ) logger.warn("Adding read " + read.getReadName() + " at " + engine.getGenomeLocParser().createGenomeLoc(read) + " from provider " + dataProvider);
|
||||
myReads.add((GATKSAMRecord)read);
|
||||
rememberLastReadLocation(read);
|
||||
myReads.add(read);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -257,28 +313,87 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return sum;
|
||||
}
|
||||
|
||||
private GenomeLoc startOfLiveRegion = null;
|
||||
|
||||
protected void notifyOfCurrentPosition(final GATKSAMRecord read) {
|
||||
notifyOfCurrentPosition(engine.getGenomeLocParser().createGenomeLoc(read));
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal(final Walker<M, T> walker, T sum) {
|
||||
return processActiveRegions((ActiveRegionWalker<M, T>)walker, sum, true);
|
||||
}
|
||||
|
||||
protected void notifyOfCurrentPosition(final GenomeLoc currentLocation) {
|
||||
if ( startOfLiveRegion == null )
|
||||
startOfLiveRegion = currentLocation;
|
||||
else
|
||||
startOfLiveRegion = startOfLiveRegion.max(currentLocation.getStartLocation());
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions to manage and interact with the live / dead zone
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Update the live region to reflect that the last read we've seen in the traversal is read
|
||||
*
|
||||
* Requires that sequential calls always be provided reads in coordinate sorted order
|
||||
*
|
||||
* @param read the last read we've seen during the traversal
|
||||
*/
|
||||
protected void rememberLastReadLocation(final GATKSAMRecord read) {
|
||||
final GenomeLoc currentLocation = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||
if ( spanOfLastReadSeen == null )
|
||||
spanOfLastReadSeen = currentLocation;
|
||||
else {
|
||||
if ( currentLocation.isBefore(spanOfLastReadSeen) )
|
||||
throw new IllegalStateException("Updating last read seen in the traversal with read " + read + " with span " + currentLocation + " but this occurs before the previously seen read " + spanOfLastReadSeen);
|
||||
spanOfLastReadSeen = currentLocation;
|
||||
}
|
||||
}
|
||||
|
||||
protected GenomeLoc getStartOfLiveRegion() {
|
||||
return startOfLiveRegion;
|
||||
/**
|
||||
* Get a GenomeLoc indicating the start (heading to the right) of the live ART region.
|
||||
* @return the left-most position of the live region on the genome
|
||||
*/
|
||||
protected GenomeLoc spanOfLastSeenRead() {
|
||||
return spanOfLastReadSeen;
|
||||
}
|
||||
|
||||
protected boolean regionCompletelyWithinDeadZone(final GenomeLoc region, final boolean includeExtension) {
|
||||
return (region.getStop() < (getStartOfLiveRegion().getStart() - (includeExtension ? getActiveRegionExtension() : 0)))
|
||||
|| ! region.onSameContig(getStartOfLiveRegion());
|
||||
/**
|
||||
* Is the active region completely within the traversal's dead zone?
|
||||
*
|
||||
* @param region the region we want to test
|
||||
* @return true if the extended location of region is completely within the current dead zone, false otherwise
|
||||
*/
|
||||
protected boolean regionCompletelyWithinDeadZone(final ActiveRegion region) {
|
||||
return region.getExtendedLoc().getStop() < spanOfLastSeenRead().getStart()
|
||||
|| ! region.getExtendedLoc().onSameContig(spanOfLastSeenRead());
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the read dead? That is, can it no longer be in any future active region, and therefore can be discarded?
|
||||
*
|
||||
* read: start |--------> stop ------ stop + extension
|
||||
* region: start |-----------------| end
|
||||
*
|
||||
* Since the regions are coming in order, read could potentially be contained in a future interval if
|
||||
* stop + activeRegionExtension >= end. If, on the other hand, stop + extension is < the end
|
||||
* of this region, then we can discard it, since any future region could only include reads
|
||||
* up to end + 1 - extension.
|
||||
*
|
||||
* Note that this function doesn't care about the dead zone. We're assuming that by
|
||||
* actually calling this function with an active region that region is already in the dead zone,
|
||||
* so checking that the read is in the dead zone doesn't make sense.
|
||||
*
|
||||
* @param read the read we're testing
|
||||
* @param activeRegion the current active region
|
||||
* @return true if the read is dead, false other
|
||||
*/
|
||||
@Requires({"read != null", "activeRegion != null"})
|
||||
private boolean readCannotOccurInAnyMoreActiveRegions(final GATKSAMRecord read, final ActiveRegion activeRegion) {
|
||||
return read.getAlignmentEnd() + getActiveRegionExtension() < activeRegion.getLocation().getStop();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions to process active regions that are ready for map / reduce calls
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions(final ActiveRegionWalker<M, T> walker, T sum, final boolean forceRegionsToBeActive) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
|
|
@ -292,11 +407,10 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peek() != null ) {
|
||||
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||
if ( forceRegionsToBeActive || regionCompletelyWithinDeadZone(extendedLoc, false) ) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
if ( DEBUG ) logger.warn("Processing active region " + activeRegion + " dead zone " + getStartOfLiveRegion());
|
||||
sum = processActiveRegion( activeRegion, sum, walker );
|
||||
final ActiveRegion activeRegion = workQueue.peek();
|
||||
if ( forceRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) {
|
||||
if ( DEBUG ) logger.warn("Processing active region " + activeRegion + " dead zone " + spanOfLastSeenRead());
|
||||
sum = processActiveRegion( workQueue.remove(), sum, walker );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -305,15 +419,6 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TraverseActiveRegions";
|
||||
}
|
||||
|
||||
private boolean readIsDead(final GATKSAMRecord read, final GenomeLoc readLoc, final ActiveRegion activeRegion) {
|
||||
return readLoc.getStop() < activeRegion.getLocation().getStart() && regionCompletelyWithinDeadZone(readLoc, true);
|
||||
}
|
||||
|
||||
protected T processActiveRegion(final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M, T> walker) {
|
||||
final Iterator<GATKSAMRecord> liveReads = myReads.iterator();
|
||||
while ( liveReads.hasNext() ) {
|
||||
|
|
@ -325,7 +430,7 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
activeRegion.add(read);
|
||||
|
||||
if ( ! walker.wantsNonPrimaryReads() ) {
|
||||
if ( DEBUG ) logger.warn("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + getStartOfLiveRegion());
|
||||
if ( DEBUG ) logger.warn("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + spanOfLastSeenRead());
|
||||
liveReads.remove();
|
||||
killed = true;
|
||||
}
|
||||
|
|
@ -333,8 +438,8 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
activeRegion.add( read );
|
||||
}
|
||||
|
||||
if ( ! killed && readIsDead(read, readLoc, activeRegion) ) {
|
||||
if ( DEBUG ) logger.warn("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + getStartOfLiveRegion());
|
||||
if ( ! killed && readCannotOccurInAnyMoreActiveRegions(read, activeRegion) ) {
|
||||
if ( DEBUG ) logger.warn("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + spanOfLastSeenRead());
|
||||
liveReads.remove();
|
||||
}
|
||||
}
|
||||
|
|
@ -343,13 +448,4 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final M x = walker.map(activeRegion, null);
|
||||
return walker.reduce( x, sum );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal(final Walker<M, T> walker, T sum) {
|
||||
return processActiveRegions((ActiveRegionWalker<M, T>)walker, sum, true);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -51,6 +53,9 @@ import java.util.List;
|
|||
public class ArtificialBAMBuilder {
|
||||
public final static int BAM_SHARD_SIZE = 16384;
|
||||
|
||||
private final IndexedFastaSequenceFile reference;
|
||||
private final GenomeLocParser parser;
|
||||
|
||||
final int nReadsPerLocus;
|
||||
final int nLoci;
|
||||
|
||||
|
|
@ -66,14 +71,39 @@ public class ArtificialBAMBuilder {
|
|||
|
||||
SAMFileHeader header;
|
||||
|
||||
public ArtificialBAMBuilder(int nReadsPerLocus, int nLoci) {
|
||||
public ArtificialBAMBuilder(final IndexedFastaSequenceFile reference, int nReadsPerLocus, int nLoci) {
|
||||
this.nReadsPerLocus = nReadsPerLocus;
|
||||
this.nLoci = nLoci;
|
||||
|
||||
this.reference = reference;
|
||||
this.parser = new GenomeLocParser(reference);
|
||||
createAndSetHeader(1);
|
||||
}
|
||||
|
||||
public ArtificialBAMBuilder(int nReadsPerLocus, int nLoci) {
|
||||
this(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000).getSequenceDictionary(), nReadsPerLocus, nLoci);
|
||||
}
|
||||
|
||||
public ArtificialBAMBuilder(final SAMSequenceDictionary dict, int nReadsPerLocus, int nLoci) {
|
||||
this.nReadsPerLocus = nReadsPerLocus;
|
||||
this.nLoci = nLoci;
|
||||
this.reference = null;
|
||||
this.parser = new GenomeLocParser(dict);
|
||||
createAndSetHeader(1);
|
||||
}
|
||||
|
||||
public IndexedFastaSequenceFile getReference() {
|
||||
return reference;
|
||||
}
|
||||
|
||||
public GenomeLocParser getGenomeLocParser() {
|
||||
return parser;
|
||||
}
|
||||
|
||||
public ArtificialBAMBuilder createAndSetHeader(final int nSamples) {
|
||||
this.header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
this.header = new SAMFileHeader();
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
header.setSequenceDictionary(parser.getContigs());
|
||||
samples.clear();
|
||||
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
|
|
@ -156,6 +186,11 @@ public class ArtificialBAMBuilder {
|
|||
public SAMFileHeader getHeader() { return header; }
|
||||
public ArtificialBAMBuilder setHeader(SAMFileHeader header) { this.header = header; return this; }
|
||||
|
||||
public int getAlignmentEnd() {
|
||||
return alignmentStart + nLoci * (skipNLoci + 1) + readLength;
|
||||
}
|
||||
|
||||
|
||||
public int getNSamples() { return samples.size(); }
|
||||
|
||||
public int expectedNumberOfReads() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* ActiveRegionWalker for unit testing
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 1/15/13
|
||||
* Time: 1:28 PM
|
||||
*/
|
||||
class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
private final double prob;
|
||||
private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
|
||||
private GenomeLocSortedSet activeRegions = null;
|
||||
|
||||
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
||||
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new LinkedHashMap<GenomeLoc, ActiveRegion>();
|
||||
|
||||
public DummyActiveRegionWalker() {
|
||||
this(1.0);
|
||||
}
|
||||
|
||||
public DummyActiveRegionWalker(double constProb) {
|
||||
this.prob = constProb;
|
||||
}
|
||||
|
||||
public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
|
||||
this(1.0);
|
||||
this.states = wantStates;
|
||||
}
|
||||
|
||||
public DummyActiveRegionWalker(GenomeLocSortedSet activeRegions) {
|
||||
this(1.0);
|
||||
this.activeRegions = activeRegions;
|
||||
}
|
||||
|
||||
public void setStates(EnumSet<ActiveRegionReadState> states) {
|
||||
this.states = states;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
isActiveCalls.add(ref.getLocus());
|
||||
final double p = activeRegions == null || activeRegions.overlaps(ref.getLocus()) ? prob : 0.0;
|
||||
return new ActivityProfileResult(ref.getLocus(), p);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
mappedActiveRegions.put(activeRegion.getLocation(), activeRegion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -30,33 +30,26 @@ import net.sf.samtools.*;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.*;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -80,54 +73,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
private final static boolean ENFORCE_CONTRACTS = false;
|
||||
private final static boolean DEBUG = false;
|
||||
|
||||
private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
private final double prob;
|
||||
private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
|
||||
|
||||
protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
|
||||
protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
|
||||
|
||||
public DummyActiveRegionWalker() {
|
||||
this.prob = 1.0;
|
||||
}
|
||||
|
||||
public DummyActiveRegionWalker(double constProb) {
|
||||
this.prob = constProb;
|
||||
}
|
||||
|
||||
public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
|
||||
this.prob = 1.0;
|
||||
this.states = wantStates;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||
return states;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
isActiveCalls.add(ref.getLocus());
|
||||
return new ActivityProfileResult(ref.getLocus(), prob);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
mappedActiveRegions.put(activeRegion.getLocation(), activeRegion);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "TraversalEngineProvider")
|
||||
public Object[][] makeTraversals() {
|
||||
final List<Object[]> traversals = new LinkedList<Object[]>();
|
||||
|
|
@ -297,7 +242,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider")
|
||||
@Test(enabled = true, dataProvider = "TraversalEngineProvider")
|
||||
public void testPrimaryReadMapping(TraverseActiveRegions t) {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
|
||||
|
||||
|
|
@ -340,7 +285,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TraversalEngineProvider")
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider")
|
||||
public void testNonPrimaryReadMapping(TraverseActiveRegions t) {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
|
||||
|
|
@ -456,7 +401,11 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(TraverseActiveRegions t, DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, testBAM))
|
||||
return getActiveRegions(t, walker, intervals, testBAM);
|
||||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(TraverseActiveRegions t, DummyActiveRegionWalker walker, List<GenomeLoc> intervals, final String bam) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, bam))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
t.endTraversal(walker, 0);
|
||||
|
|
@ -516,14 +465,15 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
record.setCigar(cigar);
|
||||
record.setReadString(new String(new char[len]).replace("\0", "A"));
|
||||
record.setBaseQualities(new byte[len]);
|
||||
record.setReadGroup(new GATKSAMReadGroupRecord(header.getReadGroup("test")));
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(TraverseActiveRegions t, final Walker walker, List<GenomeLoc> intervals, String bamFile) {
|
||||
private List<LocusShardDataProvider> createDataProviders(TraverseActiveRegions traverseActiveRegions, final Walker walker, List<GenomeLoc> intervals, String bamFile) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine, walker);
|
||||
traverseActiveRegions.initialize(engine, walker);
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
|
||||
|
|
@ -539,13 +489,144 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
new ArrayList<ReadTransformer>(),
|
||||
false, (byte)30, false, true);
|
||||
|
||||
final Set<String> samples = SampleUtils.getSAMFileSamples(dataSource.getHeader());
|
||||
|
||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs(), samples)) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
|
||||
return providers;
|
||||
}
|
||||
|
||||
@DataProvider(name = "CombinatorialARTTilingProvider")
|
||||
public Object[][] makeCombinatorialARTTilingProvider() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final List<Integer> starts = Arrays.asList(
|
||||
1, // very start of the chromosome
|
||||
ArtificialBAMBuilder.BAM_SHARD_SIZE - 100, // right before the shard boundary
|
||||
ArtificialBAMBuilder.BAM_SHARD_SIZE + 100 // right after the shard boundary
|
||||
);
|
||||
|
||||
final List<EnumSet<ActiveRegionReadState>> allReadStates = Arrays.asList(
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY),
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY),
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED)
|
||||
);
|
||||
|
||||
final int maxTests = Integer.MAX_VALUE;
|
||||
int nTests = 0;
|
||||
for ( final int readLength : Arrays.asList(10, 100) ) {
|
||||
for ( final int skips : Arrays.asList(0, 1, 10) ) {
|
||||
for ( final int start : starts ) {
|
||||
for ( final int nReadsPerLocus : Arrays.asList(1, 2) ) {
|
||||
for ( final int nLoci : Arrays.asList(1, 1000) ) {
|
||||
for ( EnumSet<ActiveRegionReadState> readStates : allReadStates ) {
|
||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci);
|
||||
bamBuilder.setReadLength(readLength);
|
||||
bamBuilder.setSkipNLoci(skips);
|
||||
bamBuilder.setAlignmentStart(start);
|
||||
|
||||
for ( final GenomeLocSortedSet activeRegions : enumerateActiveRegions(bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())) {
|
||||
nTests++;
|
||||
if ( nTests < maxTests ) // && nTests == 1238 )
|
||||
tests.add(new Object[]{nTests, activeRegions, readStates, bamBuilder});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
private Collection<GenomeLocSortedSet> enumerateActiveRegions(final int start, final int stop) {
|
||||
// should basically cut up entire region into equal sized chunks, of
|
||||
// size 10, 20, 50, 100, etc, alternating skipping pieces so they are inactive
|
||||
// Need to make sure we include some edge cases:
|
||||
final List<GenomeLocSortedSet> activeRegions = new LinkedList<GenomeLocSortedSet>();
|
||||
|
||||
for ( final int stepSize : Arrays.asList(11, 29, 53, 97) ) {
|
||||
for ( final boolean startWithActive : Arrays.asList(true, false) ) {
|
||||
activeRegions.add(makeActiveRegionMask(start, stop, stepSize, startWithActive));
|
||||
}
|
||||
}
|
||||
|
||||
// active region is the whole interval
|
||||
activeRegions.add(new GenomeLocSortedSet(genomeLocParser, genomeLocParser.createGenomeLoc("1", start, stop)));
|
||||
|
||||
// active region extends up to the end of the data, but doesn't include start
|
||||
activeRegions.add(new GenomeLocSortedSet(genomeLocParser, genomeLocParser.createGenomeLoc("1", start+10, stop)));
|
||||
|
||||
return activeRegions;
|
||||
}
|
||||
|
||||
private GenomeLocSortedSet makeActiveRegionMask(final int start, final int stop, final int stepSize, final boolean startWithActive) {
|
||||
final GenomeLocSortedSet active = new GenomeLocSortedSet(genomeLocParser);
|
||||
|
||||
boolean includeRegion = startWithActive;
|
||||
for ( int left = start; left < stop; left += stepSize) {
|
||||
final int right = left + stepSize;
|
||||
final GenomeLoc region = genomeLocParser.createGenomeLoc("1", left, right);
|
||||
if ( includeRegion )
|
||||
active.add(region);
|
||||
includeRegion = ! includeRegion;
|
||||
}
|
||||
|
||||
return active;
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled = true, dataProvider = "CombinatorialARTTilingProvider")
|
||||
public void testARTReadsInActiveRegions(final int id, final GenomeLocSortedSet activeRegions, final EnumSet<ActiveRegionReadState> readStates, final ArtificialBAMBuilder bamBuilder) {
|
||||
logger.warn("Running testARTReadsInActiveRegions id=" + id + " locs " + activeRegions + " against bam " + bamBuilder);
|
||||
final List<GenomeLoc> intervals = Arrays.asList(
|
||||
genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())
|
||||
);
|
||||
|
||||
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions);
|
||||
walker.setStates(readStates);
|
||||
|
||||
final TraverseActiveRegions traversal = new TraverseActiveRegions<Integer, Integer>();
|
||||
final Map<GenomeLoc, ActiveRegion> activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile().toString());
|
||||
|
||||
final Set<String> alreadySeenReads = new HashSet<String>(); // for use with the primary / non-primary
|
||||
for ( final ActiveRegion region : activeRegionsMap.values() ) {
|
||||
int nReadsExpectedInRegion = 0;
|
||||
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
|
||||
final GenomeLoc readLoc = genomeLocParser.createGenomeLoc(read);
|
||||
final Set<String> readNamesInRegion = readNamesInRegion(region);
|
||||
|
||||
boolean shouldBeInRegion = readStates.contains(ActiveRegionReadState.EXTENDED)
|
||||
? region.getExtendedLoc().overlapsP(readLoc)
|
||||
: region.getLocation().overlapsP(readLoc);
|
||||
|
||||
if ( ! readStates.contains(ActiveRegionReadState.NONPRIMARY) ) {
|
||||
if ( alreadySeenReads.contains(read.getReadName()) )
|
||||
shouldBeInRegion = false;
|
||||
else if ( shouldBeInRegion )
|
||||
alreadySeenReads.add(read.getReadName());
|
||||
}
|
||||
|
||||
Assert.assertEquals(readNamesInRegion.contains(read.getReadName()), shouldBeInRegion, "Region " + region +
|
||||
" failed contains read check: read " + read + " with span " + readLoc + " should be in region is " + shouldBeInRegion + " but I got the opposite");
|
||||
|
||||
nReadsExpectedInRegion += shouldBeInRegion ? 1 : 0;
|
||||
}
|
||||
|
||||
Assert.assertEquals(region.size(), nReadsExpectedInRegion, "There are more reads in active region " + region + "than expected");
|
||||
}
|
||||
}
|
||||
|
||||
private Set<String> readNamesInRegion(final ActiveRegion region) {
|
||||
final Set<String> readNames = new LinkedHashSet<String>(region.getReads().size());
|
||||
for ( final SAMRecord read : region.getReads() )
|
||||
readNames.add(read.getReadName());
|
||||
return readNames;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,6 +54,32 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
private static final boolean DEBUG = false;
|
||||
protected LocusIteratorByState li;
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testUnmappedAndAllIReadsPassThrough() {
|
||||
final int readLength = 10;
|
||||
GATKSAMRecord mapped1 = ArtificialSAMUtils.createArtificialRead(header,"mapped1",0,1,readLength);
|
||||
GATKSAMRecord mapped2 = ArtificialSAMUtils.createArtificialRead(header,"mapped2",0,1,readLength);
|
||||
GATKSAMRecord unmapped = ArtificialSAMUtils.createArtificialRead(header,"unmapped",0,1,readLength);
|
||||
GATKSAMRecord allI = ArtificialSAMUtils.createArtificialRead(header,"allI",0,1,readLength);
|
||||
|
||||
unmapped.setReadUnmappedFlag(true);
|
||||
unmapped.setCigarString("*");
|
||||
allI.setCigarString(readLength + "I");
|
||||
|
||||
List<GATKSAMRecord> reads = Arrays.asList(mapped1, unmapped, allI, mapped2);
|
||||
|
||||
// create the iterator by state with the fake reads and fake records
|
||||
li = makeLTBS(reads,createTestReadProperties(DownsamplingMethod.NONE, true));
|
||||
|
||||
Assert.assertTrue(li.hasNext());
|
||||
AlignmentContext context = li.next();
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
Assert.assertEquals(pileup.depthOfCoverage(), 2, "Should see only 2 reads in pileup, even with unmapped and all I reads");
|
||||
|
||||
final List<GATKSAMRecord> rawReads = li.transferReadsFromAllPreviousPileups();
|
||||
Assert.assertEquals(rawReads, reads, "Input and transferred read lists should be the same, and include the unmapped and all I reads");
|
||||
}
|
||||
|
||||
@Test(enabled = true && ! DEBUG)
|
||||
public void testXandEQOperators() {
|
||||
final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
|
||||
|
|
@ -451,7 +477,7 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest {
|
|||
? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null, false)
|
||||
: new DownsamplingMethod(DownsampleType.NONE, null, null, false);
|
||||
|
||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(nReadsPerLocus, nLoci);
|
||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(header.getSequenceDictionary(), nReadsPerLocus, nLoci);
|
||||
bamBuilder.createAndSetHeader(nSamples).setReadLength(readLength).setAlignmentStart(1);
|
||||
|
||||
final List<GATKSAMRecord> reads = bamBuilder.makeReads();
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ import java.util.List;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ArtificialBAMBuilderUnitTest extends BaseTest {
|
||||
@DataProvider(name = "CombinatorialARTTilingProvider")
|
||||
public Object[][] makeCombinatorialARTTilingProvider() {
|
||||
@DataProvider(name = "ArtificialBAMBuilderUnitTestProvider")
|
||||
public Object[][] makeArtificialBAMBuilderUnitTestProvider() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final List<Integer> starts = Arrays.asList(
|
||||
|
|
@ -79,7 +79,7 @@ public class ArtificialBAMBuilderUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "CombinatorialARTTilingProvider")
|
||||
@Test(dataProvider = "ArtificialBAMBuilderUnitTestProvider")
|
||||
public void testBamProvider(final ArtificialBAMBuilder bamBuilder, int readLength, int skips, int start, int nSamples, int nReadsPerLocus, int nLoci) {
|
||||
Assert.assertEquals(bamBuilder.getReadLength(), readLength);
|
||||
Assert.assertEquals(bamBuilder.getSkipNLoci(), skips);
|
||||
|
|
|
|||
Loading…
Reference in New Issue