Add tests to ensure that all insertion reads appear in the active region traversal

This commit is contained in:
Mark DePristo 2013-01-16 16:25:11 -05:00
parent 3c476a92a2
commit 738c24a3b1
2 changed files with 73 additions and 4 deletions

View File

@ -32,8 +32,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* Easy to use creator of artificial BAM files for testing
@ -64,6 +63,8 @@ public class ArtificialBAMBuilder {
int readLength = 10;
private final ArrayList<String> samples = new ArrayList<String>();
private LinkedList<GATKSAMRecord> additionalReads = new LinkedList<GATKSAMRecord>();
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
{
factory.setCreateIndex(true);
@ -118,6 +119,14 @@ public class ArtificialBAMBuilder {
return this;
}
public void addReads(final GATKSAMRecord readToAdd) {
additionalReads.add(readToAdd);
}
public void addReads(final Collection<GATKSAMRecord> readsToAdd) {
additionalReads.addAll(readsToAdd);
}
public List<String> getSamples() {
return samples;
}
@ -145,6 +154,11 @@ public class ArtificialBAMBuilder {
}
}
if ( ! additionalReads.isEmpty() ) {
reads.addAll(additionalReads);
Collections.sort(reads, new SAMRecordCoordinateComparator());
}
return reads;
}

View File

@ -501,6 +501,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
return providers;
}
// ---------------------------------------------------------------------------------------------------------
//
// Combinatorial tests to ensure reads are going into the right regions
//
// ---------------------------------------------------------------------------------------------------------
@DataProvider(name = "CombinatorialARTTilingProvider")
public Object[][] makeCombinatorialARTTilingProvider() {
final List<Object[]> tests = new LinkedList<Object[]>();
@ -582,7 +588,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
}
@Test(enabled = true, dataProvider = "CombinatorialARTTilingProvider")
@Test(enabled = true && ! DEBUG, dataProvider = "CombinatorialARTTilingProvider")
public void testARTReadsInActiveRegions(final int id, final GenomeLocSortedSet activeRegions, final EnumSet<ActiveRegionReadState> readStates, final ArtificialBAMBuilder bamBuilder) {
logger.warn("Running testARTReadsInActiveRegions id=" + id + " locs " + activeRegions + " against bam " + bamBuilder);
final List<GenomeLoc> intervals = Arrays.asList(
@ -597,10 +603,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
final Set<String> alreadySeenReads = new HashSet<String>(); // for use with the primary / non-primary
for ( final ActiveRegion region : activeRegionsMap.values() ) {
final Set<String> readNamesInRegion = readNamesInRegion(region);
int nReadsExpectedInRegion = 0;
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
final GenomeLoc readLoc = genomeLocParser.createGenomeLoc(read);
final Set<String> readNamesInRegion = readNamesInRegion(region);
boolean shouldBeInRegion = readStates.contains(ActiveRegionReadState.EXTENDED)
? region.getExtendedLoc().overlapsP(readLoc)
@ -629,4 +635,53 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
readNames.add(read.getReadName());
return readNames;
}
// ---------------------------------------------------------------------------------------------------------
//
// Make sure all insertion reads are properly included in the active regions
//
// ---------------------------------------------------------------------------------------------------------
@Test
public void ensureAllInsertionReadsAreInActiveRegions() {
final int readLength = 10;
final int start = 20;
final int nReadsPerLocus = 10;
final int nLoci = 3;
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci);
bamBuilder.setReadLength(readLength);
bamBuilder.setAlignmentStart(start);
// note that the position must be +1 as the read's all I cigar puts the end 1 bp before start, leaving it out of the region
GATKSAMRecord allI = ArtificialSAMUtils.createArtificialRead(bamBuilder.getHeader(),"allI",0,start+1,readLength);
allI.setCigarString(readLength + "I");
allI.setReadGroup(new GATKSAMReadGroupRecord(bamBuilder.getHeader().getReadGroups().get(0)));
bamBuilder.addReads(allI);
final GenomeLocSortedSet activeRegions = new GenomeLocSortedSet(bamBuilder.getGenomeLocParser());
activeRegions.add(bamBuilder.getGenomeLocParser().createGenomeLoc("1", 10, 30));
final List<GenomeLoc> intervals = Arrays.asList(
genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())
);
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions);
final TraverseActiveRegions traversal = new TraverseActiveRegions<Integer, Integer>();
final Map<GenomeLoc, ActiveRegion> activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile().toString());
final ActiveRegion region = activeRegionsMap.values().iterator().next();
int nReadsExpectedInRegion = 0;
final Set<String> readNamesInRegion = readNamesInRegion(region);
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
Assert.assertTrue(readNamesInRegion.contains(read.getReadName()),
"Region " + region + " should contain read " + read + " with cigar " + read.getCigarString() + " but it wasn't");
nReadsExpectedInRegion++;
}
Assert.assertEquals(region.size(), nReadsExpectedInRegion, "There are more reads in active region " + region + "than expected");
}
}