Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
ded659232b
|
|
@ -184,6 +184,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
||||
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
|
||||
|
||||
/**
|
||||
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
|
||||
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
|
||||
* read can map, but if its pair is too divergent then it may be unmapped and placed next to its mate, taking
|
||||
* the mates contig and alignment start. If this flag is provided the haplotype caller will see such reads,
|
||||
* and may make use of them in assembly and calling, where possible.
|
||||
*/
|
||||
@Argument(fullName="includeUmappedReads", shortName="unmapped", doc="If provided, unmapped reads with chromosomal coordinates (i.e., those placed to their maps) will be included in the assembly and calling", required = false)
|
||||
protected boolean includeUnmappedReads = false;
|
||||
|
||||
@Argument(fullName="useAllelesTrigger", shortName="allelesTrigger", doc = "If specified, use additional trigger on variants found in an external alleles file", required=false)
|
||||
protected boolean USE_ALLELES_TRIGGER = false;
|
||||
|
||||
|
|
@ -354,11 +364,20 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
// enable non primary and extended reads in the active region
|
||||
@Override
|
||||
public EnumSet<ActiveRegionReadState> desiredReadStates() {
|
||||
return EnumSet.of(
|
||||
ActiveRegionReadState.PRIMARY,
|
||||
ActiveRegionReadState.NONPRIMARY,
|
||||
ActiveRegionReadState.EXTENDED
|
||||
);
|
||||
if ( includeUnmappedReads ) {
|
||||
throw new UserException.BadArgumentValue("includeUmappedReads", "is not yet functional");
|
||||
// return EnumSet.of(
|
||||
// ActiveRegionReadState.PRIMARY,
|
||||
// ActiveRegionReadState.NONPRIMARY,
|
||||
// ActiveRegionReadState.EXTENDED,
|
||||
// ActiveRegionReadState.UNMAPPED
|
||||
// );
|
||||
} else
|
||||
return EnumSet.of(
|
||||
ActiveRegionReadState.PRIMARY,
|
||||
ActiveRegionReadState.NONPRIMARY,
|
||||
ActiveRegionReadState.EXTENDED
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -32,8 +32,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Easy to use creator of artificial BAM files for testing
|
||||
|
|
@ -64,6 +63,8 @@ public class ArtificialBAMBuilder {
|
|||
int readLength = 10;
|
||||
private final ArrayList<String> samples = new ArrayList<String>();
|
||||
|
||||
private LinkedList<GATKSAMRecord> additionalReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
|
||||
{
|
||||
factory.setCreateIndex(true);
|
||||
|
|
@ -118,6 +119,14 @@ public class ArtificialBAMBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public void addReads(final GATKSAMRecord readToAdd) {
|
||||
additionalReads.add(readToAdd);
|
||||
}
|
||||
|
||||
public void addReads(final Collection<GATKSAMRecord> readsToAdd) {
|
||||
additionalReads.addAll(readsToAdd);
|
||||
}
|
||||
|
||||
public List<String> getSamples() {
|
||||
return samples;
|
||||
}
|
||||
|
|
@ -145,6 +154,11 @@ public class ArtificialBAMBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
if ( ! additionalReads.isEmpty() ) {
|
||||
reads.addAll(additionalReads);
|
||||
Collections.sort(reads, new SAMRecordCoordinateComparator());
|
||||
}
|
||||
|
||||
return reads;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -501,6 +501,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return providers;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Combinatorial tests to ensure reads are going into the right regions
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "CombinatorialARTTilingProvider")
|
||||
public Object[][] makeCombinatorialARTTilingProvider() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
|
@ -582,7 +588,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
|
||||
@Test(enabled = true, dataProvider = "CombinatorialARTTilingProvider")
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "CombinatorialARTTilingProvider")
|
||||
public void testARTReadsInActiveRegions(final int id, final GenomeLocSortedSet activeRegions, final EnumSet<ActiveRegionReadState> readStates, final ArtificialBAMBuilder bamBuilder) {
|
||||
logger.warn("Running testARTReadsInActiveRegions id=" + id + " locs " + activeRegions + " against bam " + bamBuilder);
|
||||
final List<GenomeLoc> intervals = Arrays.asList(
|
||||
|
|
@ -597,10 +603,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
final Set<String> alreadySeenReads = new HashSet<String>(); // for use with the primary / non-primary
|
||||
for ( final ActiveRegion region : activeRegionsMap.values() ) {
|
||||
final Set<String> readNamesInRegion = readNamesInRegion(region);
|
||||
int nReadsExpectedInRegion = 0;
|
||||
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
|
||||
final GenomeLoc readLoc = genomeLocParser.createGenomeLoc(read);
|
||||
final Set<String> readNamesInRegion = readNamesInRegion(region);
|
||||
|
||||
boolean shouldBeInRegion = readStates.contains(ActiveRegionReadState.EXTENDED)
|
||||
? region.getExtendedLoc().overlapsP(readLoc)
|
||||
|
|
@ -629,4 +635,53 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
readNames.add(read.getReadName());
|
||||
return readNames;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Make sure all insertion reads are properly included in the active regions
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void ensureAllInsertionReadsAreInActiveRegions() {
|
||||
|
||||
final int readLength = 10;
|
||||
final int start = 20;
|
||||
final int nReadsPerLocus = 10;
|
||||
final int nLoci = 3;
|
||||
|
||||
final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci);
|
||||
bamBuilder.setReadLength(readLength);
|
||||
bamBuilder.setAlignmentStart(start);
|
||||
|
||||
// note that the position must be +1 as the read's all I cigar puts the end 1 bp before start, leaving it out of the region
|
||||
GATKSAMRecord allI = ArtificialSAMUtils.createArtificialRead(bamBuilder.getHeader(),"allI",0,start+1,readLength);
|
||||
allI.setCigarString(readLength + "I");
|
||||
allI.setReadGroup(new GATKSAMReadGroupRecord(bamBuilder.getHeader().getReadGroups().get(0)));
|
||||
|
||||
bamBuilder.addReads(allI);
|
||||
|
||||
final GenomeLocSortedSet activeRegions = new GenomeLocSortedSet(bamBuilder.getGenomeLocParser());
|
||||
activeRegions.add(bamBuilder.getGenomeLocParser().createGenomeLoc("1", 10, 30));
|
||||
final List<GenomeLoc> intervals = Arrays.asList(
|
||||
genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())
|
||||
);
|
||||
|
||||
final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions);
|
||||
|
||||
final TraverseActiveRegions traversal = new TraverseActiveRegions<Integer, Integer>();
|
||||
final Map<GenomeLoc, ActiveRegion> activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile().toString());
|
||||
|
||||
final ActiveRegion region = activeRegionsMap.values().iterator().next();
|
||||
int nReadsExpectedInRegion = 0;
|
||||
|
||||
final Set<String> readNamesInRegion = readNamesInRegion(region);
|
||||
for ( final GATKSAMRecord read : bamBuilder.makeReads() ) {
|
||||
Assert.assertTrue(readNamesInRegion.contains(read.getReadName()),
|
||||
"Region " + region + " should contain read " + read + " with cigar " + read.getCigarString() + " but it wasn't");
|
||||
nReadsExpectedInRegion++;
|
||||
}
|
||||
|
||||
Assert.assertEquals(region.size(), nReadsExpectedInRegion, "There are more reads in active region " + region + "than expected");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue