Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Eric Banks 2013-01-16 22:49:56 -05:00
commit ded659232b
3 changed files with 97 additions and 9 deletions

View File

@ -184,6 +184,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
/**
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
* read can map, but if its pair is too divergent then it may be unmapped and placed next to its mate, taking
* the mates contig and alignment start. If this flag is provided the haplotype caller will see such reads,
* and may make use of them in assembly and calling, where possible.
*/
@Argument(fullName="includeUmappedReads", shortName="unmapped", doc="If provided, unmapped reads with chromosomal coordinates (i.e., those placed to their maps) will be included in the assembly and calling", required = false)
protected boolean includeUnmappedReads = false;
@Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
protected boolean USE_ALLELES_TRIGGER = false;
@ -354,11 +364,20 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
// enable non primary and extended reads in the active region
@Override
public EnumSet<ActiveRegionReadState> desiredReadStates() {
return EnumSet.of(
ActiveRegionReadState.PRIMARY,
ActiveRegionReadState.NONPRIMARY,
ActiveRegionReadState.EXTENDED
);
if ( includeUnmappedReads ) {
throw new UserException.BadArgumentValue("includeUmappedReads", "is not yet functional");
// return EnumSet.of(
// ActiveRegionReadState.PRIMARY,
// ActiveRegionReadState.NONPRIMARY,
// ActiveRegionReadState.EXTENDED,
// ActiveRegionReadState.UNMAPPED
// );
} else
return EnumSet.of(
ActiveRegionReadState.PRIMARY,
ActiveRegionReadState.NONPRIMARY,
ActiveRegionReadState.EXTENDED
);
}
@Override

View File

@ -32,8 +32,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* Easy to use creator of artificial BAM files for testing
@ -64,6 +63,8 @@ public class ArtificialBAMBuilder {
int readLength = 10;
private final ArrayList<String> samples = new ArrayList<String>();
private LinkedList<GATKSAMRecord> additionalReads = new LinkedList<GATKSAMRecord>();
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
{
factory.setCreateIndex(true);
@ -118,6 +119,14 @@ public class ArtificialBAMBuilder {
return this;
}
public void addReads(final GATKSAMRecord readToAdd) {
additionalReads.add(readToAdd);
}
public void addReads(final Collection<GATKSAMRecord> readsToAdd) {
additionalReads.addAll(readsToAdd);
}
public List<String> getSamples() {
return samples;
}
@ -145,6 +154,11 @@ public class ArtificialBAMBuilder {
}
}
if ( ! additionalReads.isEmpty() ) {
reads.addAll(additionalReads);
Collections.sort(reads, new SAMRecordCoordinateComparator());
}
return reads;
}

View File

@ -501,6 +501,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
return providers;
}
// ---------------------------------------------------------------------------------------------------------
//
// Combinatorial tests to ensure reads are going into the right regions
//
// ---------------------------------------------------------------------------------------------------------
@DataProvider(name = "CombinatorialARTTilingProvider")
public Object[][] makeCombinatorialARTTilingProvider() {
final List<Object[]> tests = new LinkedList<Object[]>();
@ -582,7 +588,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
}
@Test(enabled = true, dataProvider = "CombinatorialARTTilingProvider")
@Test(enabled = true && ! DEBUG, dataProvider = "CombinatorialARTTilingProvider")
public void testARTReadsInActiveRegions(final int id, final GenomeLocSortedSet activeRegions, final EnumSet<ActiveRegionReadState> readStates, final ArtificialBAMBuilder bamBuilder) {
logger.warn("Running testARTReadsInActiveRegions id=" + id + " locs " + activeRegions + " against bam " + bamBuilder);
final List<GenomeLoc> intervals = Arrays.asList(
@ -597,10 +603,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
final Set<String> alreadySeenReads = new HashSet<String>(); // for use with the primary / non-primary
for ( final ActiveRegion region : activeRegionsMap.values() ) {
final Set<String> readNamesInRegion = readNamesInRegion(region);
int nReadsExpectedInRegion = 0;
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
final GenomeLoc readLoc = genomeLocParser.createGenomeLoc(read);
final Set<String> readNamesInRegion = readNamesInRegion(region);
boolean shouldBeInRegion = readStates.contains(ActiveRegionReadState.EXTENDED)
? region.getExtendedLoc().overlapsP(readLoc)
@ -629,4 +635,53 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
readNames.add(read.getReadName());
return readNames;
}
// ---------------------------------------------------------------------------------------------------------
//
// Make sure all insertion reads are properly included in the active regions
//
// ---------------------------------------------------------------------------------------------------------
@Test
public void ensureAllInsertionReadsAreInActiveRegions() {
final int readLength = 10;
final int start = 20;
final int nReadsPerLocus = 10;
final int nLoci = 3;
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci);
bamBuilder.setReadLength(readLength);
bamBuilder.setAlignmentStart(start);
// note that the position must be +1 as the read's all I cigar puts the end 1 bp before start, leaving it out of the region
GATKSAMRecord allI = ArtificialSAMUtils.createArtificialRead(bamBuilder.getHeader(),"allI",0,start+1,readLength);
allI.setCigarString(readLength + "I");
allI.setReadGroup(new GATKSAMReadGroupRecord(bamBuilder.getHeader().getReadGroups().get(0)));
bamBuilder.addReads(allI);
final GenomeLocSortedSet activeRegions = new GenomeLocSortedSet(bamBuilder.getGenomeLocParser());
activeRegions.add(bamBuilder.getGenomeLocParser().createGenomeLoc("1", 10, 30));
final List<GenomeLoc> intervals = Arrays.asList(
genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())
);
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions);
final TraverseActiveRegions traversal = new TraverseActiveRegions<Integer, Integer>();
final Map<GenomeLoc, ActiveRegion> activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile().toString());
final ActiveRegion region = activeRegionsMap.values().iterator().next();
int nReadsExpectedInRegion = 0;
final Set<String> readNamesInRegion = readNamesInRegion(region);
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
Assert.assertTrue(readNamesInRegion.contains(read.getReadName()),
"Region " + region + " should contain read " + read + " with cigar " + read.getCigarString() + " but it wasn't");
nReadsExpectedInRegion++;
}
Assert.assertEquals(region.size(), nReadsExpectedInRegion, "There are more reads in active region " + region + "than expected");
}
}