Merge branch 'master' of gsa2:/humgen/gsa-scr1/chartl/dev/unstable

This commit is contained in:
Chris Hartl 2012-12-12 15:42:40 -05:00
commit 5780926254
61 changed files with 1923 additions and 1969 deletions

View File

@ -56,11 +56,14 @@ public class AlleleBiasedDownsamplingUtils {
for ( int i = 0; i < 4; i++ )
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
// keep all of the reduced reads
final ArrayList<PileupElement> reducedReadPileups = new ArrayList<PileupElement>();
// start by stratifying the reads by the alleles they represent at this position
for( final PileupElement pe : pileup ) {
// abort if we have a reduced read - we do not want to remove it!
// we do not want to remove a reduced read
if ( pe.getRead().isReducedRead() )
return pileup;
reducedReadPileups.add(pe);
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
if ( baseIndex != -1 )
@ -76,6 +79,7 @@ public class AlleleBiasedDownsamplingUtils {
return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
}
});
elementsToKeep.addAll(reducedReadPileups);
// make a listing of allele counts
final int[] alleleCounts = new int[4];

View File

@ -85,7 +85,7 @@ import java.util.*;
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.INTERVAL)
@PartitionBy(PartitionType.CONTIG)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {

View File

@ -240,8 +240,8 @@ public class AFCalcPerformanceTest {
if ( a.isNonReference() ) {
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost);
}
}
}

View File

@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -449,10 +448,10 @@ public class GenotypingEngine {
final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
for( final Haplotype h : haplotypes ) {
if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
alleleMapper.get(refAllele).add(h);
} else if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
alleleMapper.get(h.getArtificialAllele()).add(h);
} else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later
undeterminedHaplotypes.add(h);
} else {
boolean haplotypeIsDetermined = false;
for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {

View File

@ -282,10 +282,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for( final VariantContext compVC : activeAllelesToGenotype ) {
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
return returnHaplotypes;
//throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
}
addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true );
}
}
@ -293,7 +290,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
// for GGA mode, add the desired allele into the haplotype if it isn't already present
if( !activeAllelesToGenotype.isEmpty() ) {
@ -308,7 +305,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
// falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
if( vcOnHaplotype == null ) {
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false );
}
}
}
@ -332,7 +329,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return returnHaplotypes;
}
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
if( haplotype == null ) { return false; }
final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
@ -387,7 +384,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
return false;
}
if( !haplotypeList.contains(h) ) {
if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) {
haplotypeList.add(h);
return true;
} else {

View File

@ -80,11 +80,11 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
}
}

View File

@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultipleSNPAlleles() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
Arrays.asList("d20c7a143b899f0239bf64b652ad3edb"));
Arrays.asList("97df6c2a8d390d43b9bdf56c979d9b09"));
executeTest("test Multiple SNP alleles", spec);
}
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMismatchedPLs() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
Arrays.asList("fb204e821a24d03bd3a671b6e01c449a"));
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
executeTest("test mismatched PLs", spec);
}
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "5b8f477c287770b5769b05591e35bc2d";
private final static String COMPRESSED_OUTPUT_MD5 = "3eba6c309514d1e9ee06a20a112b68e6";
@Test
public void testCompressedOutput() {
@ -436,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testNsInCigar() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
Arrays.asList("32f18ba50406cd8c8069ba07f2f89558"));
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
executeTest("test calling on reads with Ns in CIGAR", spec);
}
@ -451,13 +451,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReducedBam() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
Arrays.asList("c1077662411164182c5f75478344f83d"));
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
executeTest("test calling on a ReducedRead BAM", spec);
}
@Test
public void testReducedBamSNPs() {
testReducedCalling("SNP", "dee6590e3b7079890bc3a9cb372c297e");
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
}
@Test

View File

@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest {
List<Object[]> tests = new ArrayList<Object[]>();
final List<Double> pValues = new LinkedList<Double>();
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) )
for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) )
pValues.add(p + espilon);
for ( final double pNonRef : pValues ) {
@ -106,16 +106,16 @@ public class AFCalcResultUnitTest extends BaseTest {
alleles,
MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
log10Even,
Collections.singletonMap(C, Math.log10(pNonRef)));
Collections.singletonMap(C, Math.log10(1 - pNonRef)));
}
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
final AFCalcResult result = makePolymorphicTestData(pNonRef);
final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(pThreshold));
Assert.assertEquals(actualIsPoly, shouldBePoly,
"isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
+ actualIsPoly + " but the expected result is " + shouldBePoly);
final AFCalcResult result = makePolymorphicTestData(pNonRef);
final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(1 - pThreshold));
Assert.assertEquals(actualIsPoly, shouldBePoly,
"isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
+ actualIsPoly + " but the expected result is " + shouldBePoly);
}
@Test(enabled = true, dataProvider = "TestIsPolymorphic")

View File

@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest {
// must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs
Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt));
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt));
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt));
}
}
}

View File

@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
for ( int i = 0; i < log10LAlleles.size(); i++ ) {
final double log10LAllele1 = log10LAlleles.get(i);
final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0));
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
originalPriors.add(result1);
pNonRefN.add(log10pNonRef*(i+1));
}

View File

@ -33,7 +33,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"e2b3bf420c45c677956a2e4a56d75ea2");
"fe84caa79f59ecbd98fcbcd5b30ab164");
}
private void HCTestComplexVariants(String bam, String args, String md5) {

View File

@ -445,13 +445,17 @@ public class GenomeAnalysisEngine {
protected DownsamplingMethod getDownsamplingMethod() {
GATKArgumentCollection argCollection = this.getArguments();
boolean useExperimentalDownsampling = argCollection.enableExperimentalDownsampling;
// Legacy downsampler can only be selected via the command line, not via walker annotations
boolean useLegacyDownsampler = argCollection.useLegacyDownsampler;
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useExperimentalDownsampling);
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useExperimentalDownsampling);
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler);
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler);
return commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
method.checkCompatibilityWithWalker(walker);
return method;
}
protected void setDownsamplingMethod(DownsamplingMethod method) {
@ -580,9 +584,9 @@ public class GenomeAnalysisEngine {
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
}
// Use the experimental ReadShardBalancer if experimental downsampling is enabled
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useExperimentalDownsampling ?
new ExperimentalReadShardBalancer() :
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
new LegacyReadShardBalancer() :
new ReadShardBalancer();
if(intervals == null)

View File

@ -305,11 +305,23 @@ public class WalkerManager extends PluginManager<Walker> {
* Gets the type of downsampling method requested by the walker. If an alternative
* downsampling method is specified on the command-line, the command-line version will
* be used instead.
* @param walkerClass The class of the walker to interrogate.
* @param useExperimentalDownsampling If true, use the experimental downsampling implementation
* @param walker The walker to interrogate.
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useExperimentalDownsampling) {
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) {
return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler);
}
/**
* Gets the type of downsampling method requested by the walker. If an alternative
* downsampling method is specified on the command-line, the command-line version will
* be used instead.
* @param walkerClass The class of the walker to interrogate.
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useLegacyDownsampler) {
DownsamplingMethod downsamplingMethod = null;
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
@ -317,7 +329,7 @@ public class WalkerManager extends PluginManager<Walker> {
DownsampleType type = downsampleParameters.by();
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useExperimentalDownsampling);
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler);
}
return downsamplingMethod;
@ -331,18 +343,6 @@ public class WalkerManager extends PluginManager<Walker> {
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
}
/**
* Gets the type of downsampling method requested by the walker. If an alternative
* downsampling method is specified on the command-line, the command-line version will
* be used instead.
* @param walker The walker to interrogate.
* @param useExperimentalDownsampling If true, use the experimental downsampling implementation
* @return The downsampling method, as specified by the walker. Null if none exists.
*/
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useExperimentalDownsampling) {
return getDownsamplingMethod(walker.getClass(), useExperimentalDownsampling);
}
/**
* Create a name for this type of walker.
*

View File

@ -162,12 +162,11 @@ public class GATKArgumentCollection {
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
public Double downsampleFraction = null;
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
public Integer downsampleCoverage = null;
@Argument(fullName = "enable_experimental_downsampling", shortName = "enable_experimental_downsampling", doc = "Enable experimental engine-level downsampling", required = false)
@Hidden
public boolean enableExperimentalDownsampling = false;
@Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false)
public boolean useLegacyDownsampler = false;
/**
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
@ -178,7 +177,7 @@ public class GATKArgumentCollection {
if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
return null;
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, enableExperimentalDownsampling);
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler);
}
/**
@ -192,7 +191,7 @@ public class GATKArgumentCollection {
downsamplingType = method.type;
downsampleCoverage = method.toCoverage;
downsampleFraction = method.toFraction;
enableExperimentalDownsampling = method.useExperimentalDownsampling;
useLegacyDownsampler = method.useLegacyDownsampler;
}
// --------------------------------------------------------------------------------------------------------------

View File

@ -136,11 +136,12 @@ public abstract class LocusView extends LocusIterator implements View {
// Cache the current and apply filtering.
AlignmentContext current = nextLocus;
// The old ALL_READS downsampling implementation -- only use if we're not using the new experimental downsampling:
if( ! sourceInfo.getDownsamplingMethod().useExperimentalDownsampling &&
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null ) {
// The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested:
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler &&
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS &&
sourceInfo.getDownsamplingMethod().toCoverage != null ) {
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage);
}
// Indicate that the next operation will need to advance.

View File

@ -134,12 +134,11 @@ public class BAMScheduler implements Iterator<FilePointer> {
// Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling
// TODO: clean this up once the experimental downsampling engine fork collapses
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useExperimentalDownsampling ) {
currentPosition = dataSource.getInitialReaderPositions();
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) {
currentPosition = dataSource.getCurrentPosition();
}
else {
currentPosition = dataSource.getCurrentPosition();
currentPosition = dataSource.getInitialReaderPositions();
}
for(SAMReaderID reader: dataSource.getReaderIDs())

View File

@ -1,228 +0,0 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
/**
* Convert from an unbalanced iterator over FilePointers to a balanced iterator over Shards.
*
* When processing FilePointers, our strategy is to aggregate all FilePointers for each contig
* together into one monolithic FilePointer, create one persistent set of read iterators over
* that monolithic FilePointer, and repeatedly use that persistent set of read iterators to
* fill read shards with reads.
*
* This strategy has several important advantages:
*
* 1. We avoid issues with file span overlap. FilePointers that are more granular than a whole
* contig will have regions that overlap with other FilePointers on the same contig, due
* to the limited granularity of BAM index data. By creating only one FilePointer per contig,
* we avoid having to track how much of each file region we've visited (as we did in the
* former implementation), we avoid expensive non-sequential access patterns in the files,
* and we avoid having to repeatedly re-create our iterator chain for every small region
* of interest.
*
* 2. We avoid boundary issues with the engine-level downsampling. Since we create a single
* persistent set of read iterators (which include the downsampling iterator(s)) per contig,
* the downsampling process is never interrupted by FilePointer or Shard boundaries, and never
* loses crucial state information while downsampling within a contig.
*
* TODO: There is also at least one important disadvantage:
*
* 1. We load more BAM index data into memory at once, and this work is done upfront before processing
* the next contig, creating a delay before traversal of each contig. This delay may be
* compensated for by the gains listed in #1 above, and we may be no worse off overall in
* terms of total runtime, but we need to verify this empirically.
*
* @author David Roazen
*/
public class ExperimentalReadShardBalancer extends ShardBalancer {
private static Logger logger = Logger.getLogger(ExperimentalReadShardBalancer.class);
/**
* Convert iterators of file pointers into balanced iterators of shards.
* @return An iterator over balanced shards.
*/
public Iterator<Shard> iterator() {
return new Iterator<Shard>() {
/**
* The cached shard to be returned next. Prefetched in the peekable iterator style.
*/
private Shard nextShard = null;
/**
* The file pointer currently being processed.
*/
private FilePointer currentContigFilePointer = null;
/**
* Iterator over the reads from the current contig's file pointer. The same iterator will be
* used to fill all shards associated with a given file pointer
*/
private PeekableIterator<SAMRecord> currentContigReadsIterator = null;
/**
* How many FilePointers have we pulled from the filePointers iterator?
*/
private int totalFilePointersConsumed = 0;
/**
* Have we encountered a monolithic FilePointer?
*/
private boolean encounteredMonolithicFilePointer = false;
{
createNextContigFilePointer();
advance();
}
public boolean hasNext() {
return nextShard != null;
}
public Shard next() {
if ( ! hasNext() )
throw new NoSuchElementException("No next read shard available");
Shard currentShard = nextShard;
advance();
return currentShard;
}
private void advance() {
nextShard = null;
// May need multiple iterations to fill the next shard if all reads in current file spans get filtered/downsampled away
while ( nextShard == null && currentContigFilePointer != null ) {
// If we've exhausted the current file pointer of reads, move to the next file pointer (if there is one):
if ( currentContigReadsIterator != null && ! currentContigReadsIterator.hasNext() ) {
// Close the old, exhausted chain of iterators to release resources
currentContigReadsIterator.close();
// Advance to the FilePointer for the next contig
createNextContigFilePointer();
// We'll need to create a fresh iterator for this file pointer when we create the first
// shard for it below.
currentContigReadsIterator = null;
}
// At this point our currentContigReadsIterator may be null or non-null depending on whether or not
// this is our first shard for this file pointer.
if ( currentContigFilePointer != null ) {
Shard shard = new ReadShard(parser,readsDataSource, currentContigFilePointer.fileSpans, currentContigFilePointer.locations, currentContigFilePointer.isRegionUnmapped);
// Create a new reads iterator only when we've just advanced to the file pointer for the next
// contig. It's essential that the iterators persist across all shards that share the same contig
// to allow the downsampling to work properly.
if ( currentContigReadsIterator == null ) {
currentContigReadsIterator = new PeekableIterator<SAMRecord>(readsDataSource.getIterator(shard));
}
if ( currentContigReadsIterator.hasNext() ) {
shard.fill(currentContigReadsIterator);
nextShard = shard;
}
}
}
}
/**
* Aggregate all FilePointers for the next contig together into one monolithic FilePointer
* to avoid boundary issues with visiting the same file regions more than once (since more
* granular FilePointers will have regions that overlap with other nearby FilePointers due
* to the nature of BAM indices).
*
* By creating one persistent set of iterators per contig we also avoid boundary artifacts
* in the engine-level downsampling.
*
* TODO: This FilePointer aggregation should ideally be done at the BAMSchedule level for
* TODO: read traversals, as there's little point in the BAMSchedule emitting extremely
* TODO: granular FilePointers if we're just going to union them. The BAMSchedule should
* TODO: emit one FilePointer per contig for read traversals (but, crucially, NOT for
* TODO: locus traversals).
*/
private void createNextContigFilePointer() {
currentContigFilePointer = null;
List<FilePointer> nextContigFilePointers = new ArrayList<FilePointer>();
logger.info("Loading BAM index data for next contig");
while ( filePointers.hasNext() ) {
// Make sure that if we see a monolithic FilePointer (representing all regions in all files) that
// it is the ONLY FilePointer we ever encounter
if ( encounteredMonolithicFilePointer ) {
throw new ReviewedStingException("Bug: encountered additional FilePointers after encountering a monolithic FilePointer");
}
if ( filePointers.peek().isMonolithic() ) {
if ( totalFilePointersConsumed > 0 ) {
throw new ReviewedStingException("Bug: encountered additional FilePointers before encountering a monolithic FilePointer");
}
encounteredMonolithicFilePointer = true;
logger.debug(String.format("Encountered monolithic FilePointer: %s", filePointers.peek()));
}
// If this is the first FP we've seen, or we're dealing with mapped regions and the next FP is on the
// same contig as previous FPs, or all our FPs are unmapped, add the next FP to the list of FPs to merge
if ( nextContigFilePointers.isEmpty() ||
(! nextContigFilePointers.get(0).isRegionUnmapped && ! filePointers.peek().isRegionUnmapped &&
nextContigFilePointers.get(0).getContigIndex() == filePointers.peek().getContigIndex()) ||
(nextContigFilePointers.get(0).isRegionUnmapped && filePointers.peek().isRegionUnmapped) ) {
nextContigFilePointers.add(filePointers.next());
totalFilePointersConsumed++;
}
else {
break; // next FilePointer is on a different contig or has different mapped/unmapped status,
// save it for next time
}
}
if ( ! nextContigFilePointers.isEmpty() ) {
currentContigFilePointer = FilePointer.union(nextContigFilePointers, parser);
}
if ( currentContigFilePointer != null ) {
logger.info("Done loading BAM index data for next contig");
logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer));
}
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
}
};
}
}

View File

@ -0,0 +1,129 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.SAMFileSpan;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
/**
* Divide up large file pointers containing reads into more manageable subcomponents.
*
* TODO: delete this class once the experimental downsampling engine fork collapses
*/
public class LegacyReadShardBalancer extends ShardBalancer {
/**
* Convert iterators of file pointers into balanced iterators of shards.
* @return An iterator over balanced shards.
*/
public Iterator<Shard> iterator() {
return new Iterator<Shard>() {
/**
* The cached shard to be returned next. Prefetched in the peekable iterator style.
*/
private Shard nextShard = null;
/**
* The file pointer currently being processed.
*/
private FilePointer currentFilePointer;
/**
* Ending position of the last shard in the file.
*/
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
{
if(filePointers.hasNext())
currentFilePointer = filePointers.next();
advance();
}
public boolean hasNext() {
return nextShard != null;
}
public Shard next() {
if(!hasNext())
throw new NoSuchElementException("No next read shard available");
Shard currentShard = nextShard;
advance();
return currentShard;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
}
private void advance() {
Map<SAMReaderID,SAMFileSpan> shardPosition;
nextShard = null;
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
while(selectedReaders.size() == 0 && currentFilePointer != null) {
shardPosition = currentFilePointer.fileSpans;
for(SAMReaderID id: shardPosition.keySet()) {
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
selectedReaders.put(id,fileSpan);
}
if(!isEmpty(selectedReaders)) {
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
readsDataSource.fillShard(shard);
if(!shard.isBufferEmpty()) {
nextShard = shard;
break;
}
}
selectedReaders.clear();
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
}
position = readsDataSource.getCurrentPosition();
}
/**
* Detects whether the list of file spans contain any read data.
* @param selectedSpans Mapping of readers to file spans.
* @return True if file spans are completely empty; false otherwise.
*/
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
for(SAMFileSpan fileSpan: selectedSpans.values()) {
if(!fileSpan.isEmpty())
return false;
}
return true;
}
};
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, The Broad Institute
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -24,20 +24,49 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.SAMFileSpan;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.*;
/**
* Divide up large file pointers containing reads into more manageable subcomponents.
* Convert from an unbalanced iterator over FilePointers to a balanced iterator over Shards.
*
* TODO: delete this class once the experimental downsampling engine fork collapses
* When processing FilePointers, our strategy is to aggregate all FilePointers for each contig
* together into one monolithic FilePointer, create one persistent set of read iterators over
* that monolithic FilePointer, and repeatedly use that persistent set of read iterators to
* fill read shards with reads.
*
* This strategy has several important advantages:
*
* 1. We avoid issues with file span overlap. FilePointers that are more granular than a whole
* contig will have regions that overlap with other FilePointers on the same contig, due
* to the limited granularity of BAM index data. By creating only one FilePointer per contig,
* we avoid having to track how much of each file region we've visited (as we did in the
* former implementation), we avoid expensive non-sequential access patterns in the files,
* and we avoid having to repeatedly re-create our iterator chain for every small region
* of interest.
*
* 2. We avoid boundary issues with the engine-level downsampling. Since we create a single
* persistent set of read iterators (which include the downsampling iterator(s)) per contig,
* the downsampling process is never interrupted by FilePointer or Shard boundaries, and never
* loses crucial state information while downsampling within a contig.
*
* TODO: There is also at least one important disadvantage:
*
* 1. We load more BAM index data into memory at once, and this work is done upfront before processing
* the next contig, creating a delay before traversal of each contig. This delay may be
* compensated for by the gains listed in #1 above, and we may be no worse off overall in
* terms of total runtime, but we need to verify this empirically.
*
* @author David Roazen
*/
public class ReadShardBalancer extends ShardBalancer {
private static Logger logger = Logger.getLogger(ReadShardBalancer.class);
/**
* Convert iterators of file pointers into balanced iterators of shards.
* @return An iterator over balanced shards.
@ -52,16 +81,27 @@ public class ReadShardBalancer extends ShardBalancer {
/**
* The file pointer currently being processed.
*/
private FilePointer currentFilePointer;
private FilePointer currentContigFilePointer = null;
/**
* Ending position of the last shard in the file.
* Iterator over the reads from the current contig's file pointer. The same iterator will be
* used to fill all shards associated with a given file pointer
*/
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
private PeekableIterator<SAMRecord> currentContigReadsIterator = null;
/**
* How many FilePointers have we pulled from the filePointers iterator?
*/
private int totalFilePointersConsumed = 0;
/**
* Have we encountered a monolithic FilePointer?
*/
private boolean encounteredMonolithicFilePointer = false;
{
if(filePointers.hasNext())
currentFilePointer = filePointers.next();
createNextContigFilePointer();
advance();
}
@ -70,58 +110,117 @@ public class ReadShardBalancer extends ShardBalancer {
}
public Shard next() {
if(!hasNext())
if ( ! hasNext() )
throw new NoSuchElementException("No next read shard available");
Shard currentShard = nextShard;
advance();
return currentShard;
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
}
private void advance() {
Map<SAMReaderID,SAMFileSpan> shardPosition;
nextShard = null;
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
while(selectedReaders.size() == 0 && currentFilePointer != null) {
shardPosition = currentFilePointer.fileSpans;
// May need multiple iterations to fill the next shard if all reads in current file spans get filtered/downsampled away
while ( nextShard == null && currentContigFilePointer != null ) {
for(SAMReaderID id: shardPosition.keySet()) {
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
selectedReaders.put(id,fileSpan);
// If we've exhausted the current file pointer of reads, move to the next file pointer (if there is one):
if ( currentContigReadsIterator != null && ! currentContigReadsIterator.hasNext() ) {
// Close the old, exhausted chain of iterators to release resources
currentContigReadsIterator.close();
// Advance to the FilePointer for the next contig
createNextContigFilePointer();
// We'll need to create a fresh iterator for this file pointer when we create the first
// shard for it below.
currentContigReadsIterator = null;
}
if(!isEmpty(selectedReaders)) {
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
readsDataSource.fillShard(shard);
// At this point our currentContigReadsIterator may be null or non-null depending on whether or not
// this is our first shard for this file pointer.
if ( currentContigFilePointer != null ) {
Shard shard = new ReadShard(parser,readsDataSource, currentContigFilePointer.fileSpans, currentContigFilePointer.locations, currentContigFilePointer.isRegionUnmapped);
if(!shard.isBufferEmpty()) {
// Create a new reads iterator only when we've just advanced to the file pointer for the next
// contig. It's essential that the iterators persist across all shards that share the same contig
// to allow the downsampling to work properly.
if ( currentContigReadsIterator == null ) {
currentContigReadsIterator = new PeekableIterator<SAMRecord>(readsDataSource.getIterator(shard));
}
if ( currentContigReadsIterator.hasNext() ) {
shard.fill(currentContigReadsIterator);
nextShard = shard;
break;
}
}
selectedReaders.clear();
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
}
position = readsDataSource.getCurrentPosition();
}
/**
* Detects whether the list of file spans contain any read data.
* @param selectedSpans Mapping of readers to file spans.
* @return True if file spans are completely empty; false otherwise.
* Aggregate all FilePointers for the next contig together into one monolithic FilePointer
* to avoid boundary issues with visiting the same file regions more than once (since more
* granular FilePointers will have regions that overlap with other nearby FilePointers due
* to the nature of BAM indices).
*
* By creating one persistent set of iterators per contig we also avoid boundary artifacts
* in the engine-level downsampling.
*
* TODO: This FilePointer aggregation should ideally be done at the BAMSchedule level for
* TODO: read traversals, as there's little point in the BAMSchedule emitting extremely
* TODO: granular FilePointers if we're just going to union them. The BAMSchedule should
* TODO: emit one FilePointer per contig for read traversals (but, crucially, NOT for
* TODO: locus traversals).
*/
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
for(SAMFileSpan fileSpan: selectedSpans.values()) {
if(!fileSpan.isEmpty())
return false;
private void createNextContigFilePointer() {
currentContigFilePointer = null;
List<FilePointer> nextContigFilePointers = new ArrayList<FilePointer>();
logger.info("Loading BAM index data for next contig");
while ( filePointers.hasNext() ) {
// Make sure that if we see a monolithic FilePointer (representing all regions in all files) that
// it is the ONLY FilePointer we ever encounter
if ( encounteredMonolithicFilePointer ) {
throw new ReviewedStingException("Bug: encountered additional FilePointers after encountering a monolithic FilePointer");
}
if ( filePointers.peek().isMonolithic() ) {
if ( totalFilePointersConsumed > 0 ) {
throw new ReviewedStingException("Bug: encountered additional FilePointers before encountering a monolithic FilePointer");
}
encounteredMonolithicFilePointer = true;
logger.debug(String.format("Encountered monolithic FilePointer: %s", filePointers.peek()));
}
// If this is the first FP we've seen, or we're dealing with mapped regions and the next FP is on the
// same contig as previous FPs, or all our FPs are unmapped, add the next FP to the list of FPs to merge
if ( nextContigFilePointers.isEmpty() ||
(! nextContigFilePointers.get(0).isRegionUnmapped && ! filePointers.peek().isRegionUnmapped &&
nextContigFilePointers.get(0).getContigIndex() == filePointers.peek().getContigIndex()) ||
(nextContigFilePointers.get(0).isRegionUnmapped && filePointers.peek().isRegionUnmapped) ) {
nextContigFilePointers.add(filePointers.next());
totalFilePointersConsumed++;
}
else {
break; // next FilePointer is on a different contig or has different mapped/unmapped status,
// save it for next time
}
}
return true;
if ( ! nextContigFilePointers.isEmpty() ) {
currentContigFilePointer = FilePointer.union(nextContigFilePointers, parser);
}
if ( currentContigFilePointer != null ) {
logger.info("Done loading BAM index data for next contig");
logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer));
}
}
public void remove() {
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
}
};
}

View File

@ -466,7 +466,7 @@ public class SAMDataSource {
/**
* Legacy method to fill the given buffering shard with reads.
*
* Shard.fill() is used instead of this method when experimental downsampling is enabled
* Shard.fill() is used instead of this method unless legacy downsampling is enabled
*
* TODO: delete this method once the experimental downsampling engine fork collapses
*
@ -638,7 +638,8 @@ public class SAMDataSource {
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
readProperties.getSupplementalFilters(),
readProperties.getReadTransformers(),
readProperties.defaultBaseQualities());
readProperties.defaultBaseQualities(),
shard instanceof LocusShard);
}
private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
@ -695,6 +696,7 @@ public class SAMDataSource {
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
* @param supplementalFilters additional filters to apply to the reads.
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
* @param isLocusBasedTraversal true if we're dealing with a read stream from a LocusShard
* @return An iterator wrapped with filters reflecting the passed-in parameters. Will not be null.
*/
protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics,
@ -705,7 +707,8 @@ public class SAMDataSource {
Boolean noValidationOfReadOrder,
Collection<ReadFilter> supplementalFilters,
List<ReadTransformer> readTransformers,
byte defaultBaseQualities) {
byte defaultBaseQualities,
boolean isLocusBasedTraversal ) {
// ************************************************************************************************ //
// * NOTE: ALL FILTERING/DOWNSAMPLING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
@ -714,12 +717,26 @@ public class SAMDataSource {
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
if ( readProperties.getDownsamplingMethod().useExperimentalDownsampling ) {
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
// If we're using the new downsampling implementation, apply downsampling iterators at this
// point in the read stream for most (but not all) cases
if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) {
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
// splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
// of individual reads.
boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readProperties.getDownsamplingMethod().toCoverage != null;
if ( ! assumeDownstreamLIBSDownsampling ) {
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
}
}
// Use the old fractional downsampler only if we're not using experimental downsampling:
if ( ! readProperties.getDownsamplingMethod().useExperimentalDownsampling && downsamplingFraction != null )
// Use the old fractional downsampler only if we're using legacy downsampling:
// TODO: remove this statement (and associated classes) once the downsampling engine fork collapses
if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null )
wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
@ -741,19 +758,37 @@ public class SAMDataSource {
}
protected StingSAMIterator applyDownsamplingIterator( StingSAMIterator wrappedIterator ) {
if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = readProperties.getDownsamplingMethod().toCoverage != null ?
new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
new FractionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
return new PerSampleDownsamplingReadsIterator(wrappedIterator, downsamplerFactory);
if ( readProperties.getDownsamplingMethod() == null ||
readProperties.getDownsamplingMethod().type == DownsampleType.NONE ) {
return wrappedIterator;
}
else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
ReadsDownsampler<SAMRecord> downsampler = readProperties.getDownsamplingMethod().toCoverage != null ?
new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
return new DownsamplingReadsIterator(wrappedIterator, downsampler);
if ( readProperties.getDownsamplingMethod().toFraction != null ) {
// If we're downsampling to a fraction of reads, there's no point in paying the cost of
// splitting/re-assembling the read stream by sample to run the FractionalDownsampler on
// reads from each sample separately, since the result would be the same as running the
// FractionalDownsampler on the entire stream. So, ALWAYS use the DownsamplingReadsIterator
// rather than the PerSampleDownsamplingReadsIterator, even if BY_SAMPLE downsampling
// was requested.
return new DownsamplingReadsIterator(wrappedIterator,
new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction));
}
else if ( readProperties.getDownsamplingMethod().toCoverage != null ) {
// If we're downsampling to coverage, we DO need to pay the cost of splitting/re-assembling
// the read stream to run the downsampler on the reads for each individual sample separately if
// BY_SAMPLE downsampling was requested.
if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
return new PerSampleDownsamplingReadsIterator(wrappedIterator,
new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage));
}
else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
return new DownsamplingReadsIterator(wrappedIterator,
new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage));
}
}
return wrappedIterator;

View File

@ -50,9 +50,9 @@ public class DownsamplingMethod {
public final Double toFraction;
/**
* Use the new experimental downsampling?
* Use the legacy downsampling implementation instead of the newer implementation?
*/
public final boolean useExperimentalDownsampling;
public final boolean useLegacyDownsampler;
/**
* Expresses no downsampling applied at all.
@ -69,11 +69,11 @@ public class DownsamplingMethod {
*/
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) {
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) {
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
this.toCoverage = toCoverage;
this.toFraction = toFraction;
this.useExperimentalDownsampling = useExperimentalDownsampling;
this.useLegacyDownsampler = useLegacyDownsampler;
if ( type == DownsampleType.NONE ) {
toCoverage = null;
@ -101,19 +101,19 @@ public class DownsamplingMethod {
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
}
}
// Some restrictions only exist for the old downsampling implementation:
if ( ! useExperimentalDownsampling ) {
// By sample downsampling does not work with a fraction of reads in the old downsampling implementation
if( type == DownsampleType.BY_SAMPLE && toFraction != null )
throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method");
public void checkCompatibilityWithWalker( Walker walker ) {
boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) {
throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " +
"The newer downsampling implementation does not have this limitation.");
}
// Some restrictions only exist for the new downsampling implementation:
if ( useExperimentalDownsampling ) {
if ( type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation");
}
if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) {
throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " +
"You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended.");
}
}
@ -124,30 +124,34 @@ public class DownsamplingMethod {
builder.append("No downsampling");
}
else {
builder.append(String.format("Method: %s ", type));
builder.append(String.format("Method: %s, ", type));
if ( toCoverage != null ) {
builder.append(String.format("Target Coverage: %d ", toCoverage));
builder.append(String.format("Target Coverage: %d, ", toCoverage));
}
else {
builder.append(String.format("Target Fraction: %.2f ", toFraction));
builder.append(String.format("Target Fraction: %.2f, ", toFraction));
}
if ( useExperimentalDownsampling ) {
builder.append("Using Experimental Downsampling");
if ( useLegacyDownsampler ) {
builder.append("Using the legacy downsampling implementation");
}
else {
builder.append("Using the new downsampling implementation");
}
}
return builder.toString();
}
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) {
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) {
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
null, useExperimentalDownsampling);
null, useLegacyDownsampler);
}
else {
return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling);
// Downsampling is off by default for non-locus-based traversals
return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler);
}
}
}

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.downsampling;
import net.sf.samtools.SAMRecord;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
/**
* Pass-Through Downsampler: Implementation of the ReadsDownsampler interface that does no
* downsampling whatsoever, and instead simply "passes-through" all the reads it's given.
* Useful for situations where you want to disable downsampling, but still need to use
* the downsampler interface.
*
* @author David Roazen
*/
public class PassThroughDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
private ArrayList<T> selectedReads;
public PassThroughDownsampler() {
clear();
}
public void submit( T newRead ) {
// All reads pass-through, no reads get downsampled
selectedReads.add(newRead);
}
public void submit( Collection<T> newReads ) {
for ( T read : newReads ) {
submit(read);
}
}
public boolean hasFinalizedItems() {
return selectedReads.size() > 0;
}
public List<T> consumeFinalizedItems() {
// pass by reference rather than make a copy, for speed
List<T> downsampledItems = selectedReads;
clear();
return downsampledItems;
}
public boolean hasPendingItems() {
return false;
}
public T peekFinalized() {
return selectedReads.isEmpty() ? null : selectedReads.get(0);
}
public T peekPending() {
return null;
}
public int getNumberOfDiscardedItems() {
return 0;
}
public void signalEndOfInput() {
// NO-OP
}
public void clear() {
selectedReads = new ArrayList<T>();
}
public void reset() {
// NO-OP
}
public boolean requiresCoordinateSortOrder() {
return false;
}
public void signalNoMoreReadsBefore( T read ) {
// NO-OP
}
}

View File

@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
@ -346,9 +345,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
for ( final TraversalEngine te : allCreatedTraversalEngines)
te.shutdown();
// horrible hack to print nano scheduling information across all nano schedulers, if any were used
NanoScheduler.printCombinedRuntimeProfile();
allCreatedTraversalEngines.clear();
availableTraversalEngines.clear();
}

View File

@ -4,9 +4,9 @@ import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.iterators.LegacyLocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByStateExperimental;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -83,17 +83,18 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
// Temporary: use the experimental version of LocusIteratorByState if experimental downsampling was requested:
this.sourceIterator = sourceInfo.getDownsamplingMethod().useExperimentalDownsampling ?
new PeekableIterator<AlignmentContext>(new LocusIteratorByStateExperimental(iterator,sourceInfo,genomeLocParser, sampleNames))
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
this.sourceIterator = sourceInfo.getDownsamplingMethod().useLegacyDownsampler ?
new PeekableIterator<AlignmentContext>(new LegacyLocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames))
:
new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames));
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
}
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
this(shard, genomeLocParser, iterator, intervals, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
public Iterator<WindowMakerIterator> iterator() {

View File

@ -31,14 +31,14 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.LegacyReservoirDownsampler;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -50,11 +50,11 @@ import java.util.*;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
*/
public class LocusIteratorByStateExperimental extends LocusIterator {
public class LegacyLocusIteratorByState extends LocusIterator {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
// -----------------------------------------------------------------------------------------------------------------
//
@ -69,7 +69,7 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
private final ArrayList<String> samples;
private final ReadStateManager readStates;
protected static class SAMRecordState {
static private class SAMRecordState {
SAMRecord read;
int readOffset = -1; // how far are we offset from the start of the read bases?
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
@ -213,7 +213,6 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
//final boolean DEBUG2 = false && DEBUG;
private ReadProperties readInfo;
private AlignmentContext nextAlignmentContext;
private boolean performLevelingDownsampling;
// -----------------------------------------------------------------------------------------------------------------
//
@ -221,15 +220,11 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByStateExperimental(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
public LegacyLocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser;
this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator);
this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null &&
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readInfo.getDownsamplingMethod().toCoverage != null;
this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
@ -290,13 +285,11 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
final GenomeLoc location = getLocation();
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
// TODO: How can you determine here whether the current pileup has been downsampled?
boolean hasBeenSampled = false;
for (final String sample : samples) {
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
int size = 0; // number of elements in this sample's pileup
int nDeletions = 0; // number of deletions in this sample's pileup
@ -405,20 +398,34 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
protected class ReadStateManager {
private class ReadStateManager {
private final PeekableIterator<SAMRecord> iterator;
private final DownsamplingMethod downsamplingMethod;
private final SamplePartitioner samplePartitioner;
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
private final int targetCoverage;
private int totalReadStates = 0;
public ReadStateManager(Iterator<SAMRecord> source) {
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
this.iterator = new PeekableIterator<SAMRecord>(source);
for (final String sample : samples) {
readStatesBySample.put(sample, new PerSampleReadStateManager());
this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
switch (this.downsamplingMethod.type) {
case BY_SAMPLE:
if (downsamplingMethod.toCoverage == null)
throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
this.targetCoverage = downsamplingMethod.toCoverage;
break;
default:
this.targetCoverage = Integer.MAX_VALUE;
}
samplePartitioner = new SamplePartitioner();
Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
for (final String sample : samples) {
readStatesBySample.put(sample, new PerSampleReadStateManager());
readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
}
samplePartitioner = new SamplePartitioner(readSelectors);
}
/**
@ -442,6 +449,7 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
public void remove() {
wrappedIterator.remove();
totalReadStates--;
}
};
}
@ -469,6 +477,17 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
return readStatesBySample.get(sample).size();
}
/**
* The extent of downsampling; basically, the furthest base out which has 'fallen
* victim' to the downsampler.
*
* @param sample Sample, downsampled independently.
* @return Integer stop of the furthest undownsampled region.
*/
public int getDownsamplingExtent(final String sample) {
return readStatesBySample.get(sample).getDownsamplingExtent();
}
public SAMRecordState getFirst() {
for (final String sample : samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample);
@ -501,13 +520,61 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
samplePartitioner.submitRead(iterator.next());
}
}
samplePartitioner.complete();
for (final String sample : samples) {
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
addReadsToSample(statesBySample, newReads);
}
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
int numReads = statesBySample.size();
int downsamplingExtent = aggregator.getDownsamplingExtent();
if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
long readLimit = aggregator.getNumReadsSeen();
addReadsToSample(statesBySample, newReads, readLimit);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
} else {
int[] counts = statesBySample.getCountsPerAlignmentStart();
int[] updatedCounts = new int[counts.length];
System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
boolean readPruned = true;
while (numReads + newReads.size() > targetCoverage && readPruned) {
readPruned = false;
for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
if (updatedCounts[alignmentStart] > 1) {
updatedCounts[alignmentStart]--;
numReads--;
readPruned = true;
}
}
}
if (numReads == targetCoverage) {
updatedCounts[0]--;
numReads--;
}
BitSet toPurge = new BitSet(readStates.size());
int readOffset = 0;
for (int i = 0; i < updatedCounts.length; i++) {
int n = counts[i];
int k = updatedCounts[i];
for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
toPurge.set(readOffset + purgedElement);
readOffset += counts[i];
}
downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
}
}
samplePartitioner.reset();
}
@ -516,134 +583,380 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
*
* @param readStates The list of read states to add this collection of reads.
* @param reads Reads to add. Selected reads will be pulled from this source.
* @param maxReads Maximum number of reads to add.
*/
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
if (reads.isEmpty())
return;
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
int readCount = 0;
for (SAMRecord read : reads) {
SAMRecordState state = new SAMRecordState(read);
state.stepForwardOnGenome();
newReadStates.add(state);
if (readCount < maxReads) {
SAMRecordState state = new SAMRecordState(read);
state.stepForwardOnGenome();
newReadStates.add(state);
readCount++;
}
}
readStates.addStatesAtNextAlignmentStart(newReadStates);
}
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
private int thisSampleReadStates = 0;
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
performLevelingDownsampling ?
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
null;
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
private int downsamplingExtent = 0;
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
if ( states.isEmpty() ) {
return;
}
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
thisSampleReadStates += states.size();
readStates.addAll(states);
readStateCounter.add(new Counter(states.size()));
totalReadStates += states.size();
if ( levelingDownsampler != null ) {
levelingDownsampler.submit(readStatesByAlignmentStart);
levelingDownsampler.signalEndOfInput();
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
// use returned List directly rather than make a copy, for efficiency's sake
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
levelingDownsampler.reset();
}
}
public boolean isEmpty() {
return readStatesByAlignmentStart.isEmpty();
return readStates.isEmpty();
}
public SAMRecordState peek() {
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
return readStates.peek();
}
public int size() {
return thisSampleReadStates;
return readStates.size();
}
public void specifyNewDownsamplingExtent(int downsamplingExtent) {
this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public int[] getCountsPerAlignmentStart() {
int[] counts = new int[readStateCounter.size()];
int index = 0;
for (Counter counter : readStateCounter)
counts[index++] = counter.getCount();
return counts;
}
public Iterator<SAMRecordState> iterator() {
return new Iterator<SAMRecordState>() {
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
private LinkedList<SAMRecordState> currentPositionReadStates = null;
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
public boolean hasNext() {
return alignmentStartIterator.hasNext() ||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
return wrappedIterator.hasNext();
}
public SAMRecordState next() {
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
currentPositionReadStates = alignmentStartIterator.next();
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
}
return currentPositionReadStatesIterator.next();
return wrappedIterator.next();
}
public void remove() {
currentPositionReadStatesIterator.remove();
thisSampleReadStates--;
totalReadStates--;
if ( currentPositionReadStates.isEmpty() ) {
alignmentStartIterator.remove();
}
wrappedIterator.remove();
Counter counter = readStateCounter.peek();
counter.decrement();
if (counter.getCount() == 0)
readStateCounter.remove();
}
};
}
/**
* Purge the given elements from the bitset. If an element in the bitset is true, purge
* the corresponding read state.
*
* @param elements bits from the set to purge.
* @return the extent of the final downsampled read.
*/
public int purge(final BitSet elements) {
int downsamplingExtent = 0;
if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
Iterator<Counter> counterIterator = readStateCounter.iterator();
Counter currentCounter = counterIterator.next();
int readIndex = 0;
long alignmentStartCounter = currentCounter.getCount();
int toPurge = elements.nextSetBit(0);
int removedCount = 0;
while (readStateIterator.hasNext() && toPurge >= 0) {
SAMRecordState state = readStateIterator.next();
downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
if (readIndex == toPurge) {
readStateIterator.remove();
currentCounter.decrement();
if (currentCounter.getCount() == 0)
counterIterator.remove();
removedCount++;
toPurge = elements.nextSetBit(toPurge + 1);
}
readIndex++;
alignmentStartCounter--;
if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
currentCounter = counterIterator.next();
alignmentStartCounter = currentCounter.getCount();
}
}
totalReadStates -= removedCount;
return downsamplingExtent;
}
}
}
/**
* Note: stores reads by sample ID string, not by sample object
* Note: assuming that, whenever we downsample, we downsample to an integer capacity.
*/
private class SamplePartitioner {
private Map<String, Collection<SAMRecord>> readsBySample;
private long readsSeen = 0;
static private class Counter {
private int count;
public SamplePartitioner() {
readsBySample = new HashMap<String, Collection<SAMRecord>>();
for ( String sample : samples ) {
readsBySample.put(sample, new ArrayList<SAMRecord>());
}
public Counter(int count) {
this.count = count;
}
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).add(read);
readsSeen++;
public int getCount() {
return count;
}
public long getNumReadsSeen() {
return readsSeen;
}
public Collection<SAMRecord> getReadsForSample(String sampleName) {
if ( ! readsBySample.containsKey(sampleName) )
throw new NoSuchElementException("Sample name not found");
return readsBySample.get(sampleName);
}
public void reset() {
for ( Collection<SAMRecord> perSampleReads : readsBySample.values() )
perSampleReads.clear();
readsSeen = 0;
public void decrement() {
count--;
}
}
}
}
/**
* Selects reads passed to it based on a criteria decided through inheritance.
* TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option. Get rid of this.
*/
interface ReadSelector {
/**
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
*
* @param read the read to evaluate.
*/
public void submitRead(SAMRecord read);
/**
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
*
* @param read the read previously rejected.
*/
public void notifyReadRejected(SAMRecord read);
/**
* Signal the selector that read additions are complete.
*/
public void complete();
/**
* Retrieve the number of reads seen by this selector so far.
*
* @return number of reads seen.
*/
public long getNumReadsSeen();
/**
* Return the number of reads accepted by this selector so far.
*
* @return number of reads selected.
*/
public long getNumReadsSelected();
/**
* Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the
* last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
* position 3 whose cigar string is 76M, the value of this parameter will be 78.
*
* @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0.
*/
public int getDownsamplingExtent();
/**
* Get the reads selected by this selector.
*
* @return collection of reads selected by this selector.
*/
public Collection<SAMRecord> getSelectedReads();
/**
* Reset this collection to its pre-gathered state.
*/
public void reset();
}
/**
* Select every read passed in.
*/
class AllReadsSelector implements ReadSelector {
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
private long readsSeen = 0;
private int downsamplingExtent = 0;
public void submitRead(SAMRecord read) {
reads.add(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
}
public void complete() {
// NO-OP.
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return readsSeen;
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return reads;
}
public void reset() {
reads.clear();
readsSeen = 0;
downsamplingExtent = 0;
}
}
/**
* Select N reads randomly from the input stream.
*/
class NRandomReadSelector implements ReadSelector {
private final LegacyReservoirDownsampler<SAMRecord> reservoir;
private final ReadSelector chainedSelector;
private long readsSeen = 0;
private int downsamplingExtent = 0;
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
this.reservoir = new LegacyReservoirDownsampler<SAMRecord>((int) readLimit);
this.chainedSelector = chainedSelector;
}
public void submitRead(SAMRecord read) {
SAMRecord displaced = reservoir.add(read);
if (displaced != null && chainedSelector != null) {
chainedSelector.notifyReadRejected(read);
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
}
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
}
public void complete() {
for (SAMRecord read : reservoir.getDownsampledContents())
chainedSelector.submitRead(read);
if (chainedSelector != null)
chainedSelector.complete();
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return reservoir.size();
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return reservoir.getDownsampledContents();
}
public void reset() {
reservoir.clear();
downsamplingExtent = 0;
if (chainedSelector != null)
chainedSelector.reset();
}
}
/**
* Note: stores reads by sample ID string, not by sample object
*/
class SamplePartitioner implements ReadSelector {
private final Map<String, ReadSelector> readsBySample;
private long readsSeen = 0;
public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
readsBySample = readSelectors;
}
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).submitRead(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).notifyReadRejected(read);
readsSeen++;
}
public void complete() {
// NO-OP.
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return readsSeen;
}
public int getDownsamplingExtent() {
int downsamplingExtent = 0;
for (ReadSelector storage : readsBySample.values())
downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
}
public ReadSelector getSelectedReads(String sampleName) {
if (!readsBySample.containsKey(sampleName))
throw new NoSuchElementException("Sample name not found");
return readsBySample.get(sampleName);
}
public void reset() {
for (ReadSelector storage : readsBySample.values())
storage.reset();
readsSeen = 0;
}
}

View File

@ -31,14 +31,11 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.downsampling.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReservoirDownsampler;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -54,7 +51,7 @@ public class LocusIteratorByState extends LocusIterator {
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
// -----------------------------------------------------------------------------------------------------------------
//
@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
private final ArrayList<String> samples;
private final ReadStateManager readStates;
static private class SAMRecordState {
protected static class SAMRecordState {
SAMRecord read;
int readOffset = -1; // how far are we offset from the start of the read bases?
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
@ -213,6 +210,7 @@ public class LocusIteratorByState extends LocusIterator {
//final boolean DEBUG2 = false && DEBUG;
private ReadProperties readInfo;
private AlignmentContext nextAlignmentContext;
private boolean performDownsampling;
// -----------------------------------------------------------------------------------------------------------------
//
@ -224,7 +222,18 @@ public class LocusIteratorByState extends LocusIterator {
this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser;
this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
// LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
// downsampling to coverage by sample. SAMDataSource will have refrained from applying
// any downsamplers to the read stream in this case, in the expectation that LIBS will
// manage the downsampling. The reason for this is twofold: performance (don't have to
// split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
// of reads (eg., using half of a read, and throwing the rest away).
this.performDownsampling = readInfo.getDownsamplingMethod() != null &&
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
readInfo.getDownsamplingMethod().toCoverage != null;
this.readStates = new ReadStateManager(samIterator);
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
@ -285,11 +294,13 @@ public class LocusIteratorByState extends LocusIterator {
final GenomeLoc location = getLocation();
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
// TODO: How can you determine here whether the current pileup has been downsampled?
boolean hasBeenSampled = false;
for (final String sample : samples) {
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
int size = 0; // number of elements in this sample's pileup
int nDeletions = 0; // number of deletions in this sample's pileup
@ -398,34 +409,20 @@ public class LocusIteratorByState extends LocusIterator {
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
private class ReadStateManager {
protected class ReadStateManager {
private final PeekableIterator<SAMRecord> iterator;
private final DownsamplingMethod downsamplingMethod;
private final SamplePartitioner samplePartitioner;
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
private final int targetCoverage;
private int totalReadStates = 0;
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
public ReadStateManager(Iterator<SAMRecord> source) {
this.iterator = new PeekableIterator<SAMRecord>(source);
this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
switch (this.downsamplingMethod.type) {
case BY_SAMPLE:
if (downsamplingMethod.toCoverage == null)
throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
this.targetCoverage = downsamplingMethod.toCoverage;
break;
default:
this.targetCoverage = Integer.MAX_VALUE;
}
Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
for (final String sample : samples) {
readStatesBySample.put(sample, new PerSampleReadStateManager());
readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
}
samplePartitioner = new SamplePartitioner(readSelectors);
samplePartitioner = new SamplePartitioner(performDownsampling);
}
/**
@ -449,7 +446,6 @@ public class LocusIteratorByState extends LocusIterator {
public void remove() {
wrappedIterator.remove();
totalReadStates--;
}
};
}
@ -477,17 +473,6 @@ public class LocusIteratorByState extends LocusIterator {
return readStatesBySample.get(sample).size();
}
/**
* The extent of downsampling; basically, the furthest base out which has 'fallen
* victim' to the downsampler.
*
* @param sample Sample, downsampled independently.
* @return Integer stop of the furthest undownsampled region.
*/
public int getDownsamplingExtent(final String sample) {
return readStatesBySample.get(sample).getDownsamplingExtent();
}
public SAMRecordState getFirst() {
for (final String sample : samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample);
@ -520,61 +505,15 @@ public class LocusIteratorByState extends LocusIterator {
samplePartitioner.submitRead(iterator.next());
}
}
samplePartitioner.complete();
samplePartitioner.doneSubmittingReads();
for (final String sample : samples) {
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
int numReads = statesBySample.size();
int downsamplingExtent = aggregator.getDownsamplingExtent();
if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
long readLimit = aggregator.getNumReadsSeen();
addReadsToSample(statesBySample, newReads, readLimit);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
} else {
int[] counts = statesBySample.getCountsPerAlignmentStart();
int[] updatedCounts = new int[counts.length];
System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
boolean readPruned = true;
while (numReads + newReads.size() > targetCoverage && readPruned) {
readPruned = false;
for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
if (updatedCounts[alignmentStart] > 1) {
updatedCounts[alignmentStart]--;
numReads--;
readPruned = true;
}
}
}
if (numReads == targetCoverage) {
updatedCounts[0]--;
numReads--;
}
BitSet toPurge = new BitSet(readStates.size());
int readOffset = 0;
for (int i = 0; i < updatedCounts.length; i++) {
int n = counts[i];
int k = updatedCounts[i];
for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
toPurge.set(readOffset + purgedElement);
readOffset += counts[i];
}
downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
}
addReadsToSample(statesBySample, newReads);
}
samplePartitioner.reset();
}
@ -583,380 +522,140 @@ public class LocusIteratorByState extends LocusIterator {
*
* @param readStates The list of read states to add this collection of reads.
* @param reads Reads to add. Selected reads will be pulled from this source.
* @param maxReads Maximum number of reads to add.
*/
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
if (reads.isEmpty())
return;
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
int readCount = 0;
for (SAMRecord read : reads) {
if (readCount < maxReads) {
SAMRecordState state = new SAMRecordState(read);
state.stepForwardOnGenome();
newReadStates.add(state);
readCount++;
}
SAMRecordState state = new SAMRecordState(read);
state.stepForwardOnGenome();
newReadStates.add(state);
}
readStates.addStatesAtNextAlignmentStart(newReadStates);
}
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
private int downsamplingExtent = 0;
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
private int thisSampleReadStates = 0;
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
performDownsampling ?
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
null;
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
readStates.addAll(states);
readStateCounter.add(new Counter(states.size()));
if ( states.isEmpty() ) {
return;
}
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
thisSampleReadStates += states.size();
totalReadStates += states.size();
if ( levelingDownsampler != null ) {
levelingDownsampler.submit(readStatesByAlignmentStart);
levelingDownsampler.signalEndOfInput();
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
// use returned List directly rather than make a copy, for efficiency's sake
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
levelingDownsampler.reset();
}
}
public boolean isEmpty() {
return readStates.isEmpty();
return readStatesByAlignmentStart.isEmpty();
}
public SAMRecordState peek() {
return readStates.peek();
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
}
public int size() {
return readStates.size();
}
public void specifyNewDownsamplingExtent(int downsamplingExtent) {
this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public int[] getCountsPerAlignmentStart() {
int[] counts = new int[readStateCounter.size()];
int index = 0;
for (Counter counter : readStateCounter)
counts[index++] = counter.getCount();
return counts;
return thisSampleReadStates;
}
public Iterator<SAMRecordState> iterator() {
return new Iterator<SAMRecordState>() {
private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
private LinkedList<SAMRecordState> currentPositionReadStates = null;
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
public boolean hasNext() {
return wrappedIterator.hasNext();
return alignmentStartIterator.hasNext() ||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
}
public SAMRecordState next() {
return wrappedIterator.next();
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
currentPositionReadStates = alignmentStartIterator.next();
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
}
return currentPositionReadStatesIterator.next();
}
public void remove() {
wrappedIterator.remove();
Counter counter = readStateCounter.peek();
counter.decrement();
if (counter.getCount() == 0)
readStateCounter.remove();
currentPositionReadStatesIterator.remove();
thisSampleReadStates--;
totalReadStates--;
if ( currentPositionReadStates.isEmpty() ) {
alignmentStartIterator.remove();
}
}
};
}
}
}
/**
* Purge the given elements from the bitset. If an element in the bitset is true, purge
* the corresponding read state.
*
* @param elements bits from the set to purge.
* @return the extent of the final downsampled read.
*/
public int purge(final BitSet elements) {
int downsamplingExtent = 0;
/**
* Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
*
* Note: stores reads by sample ID string, not by sample object
*/
private class SamplePartitioner {
private Map<String, Downsampler<SAMRecord>> readsBySample;
if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
public SamplePartitioner( boolean downsampleReads ) {
readsBySample = new HashMap<String, Downsampler<SAMRecord>>();
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
for ( String sample : samples ) {
readsBySample.put(sample,
downsampleReads ? new ReservoirDownsampler<SAMRecord>(readInfo.getDownsamplingMethod().toCoverage) :
new PassThroughDownsampler<SAMRecord>());
}
}
Iterator<Counter> counterIterator = readStateCounter.iterator();
Counter currentCounter = counterIterator.next();
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).submit(read);
}
int readIndex = 0;
long alignmentStartCounter = currentCounter.getCount();
public void doneSubmittingReads() {
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
perSampleReads.getValue().signalEndOfInput();
}
}
int toPurge = elements.nextSetBit(0);
int removedCount = 0;
public Collection<SAMRecord> getReadsForSample(String sampleName) {
if ( ! readsBySample.containsKey(sampleName) )
throw new NoSuchElementException("Sample name not found");
while (readStateIterator.hasNext() && toPurge >= 0) {
SAMRecordState state = readStateIterator.next();
downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
return readsBySample.get(sampleName).consumeFinalizedItems();
}
if (readIndex == toPurge) {
readStateIterator.remove();
currentCounter.decrement();
if (currentCounter.getCount() == 0)
counterIterator.remove();
removedCount++;
toPurge = elements.nextSetBit(toPurge + 1);
}
readIndex++;
alignmentStartCounter--;
if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
currentCounter = counterIterator.next();
alignmentStartCounter = currentCounter.getCount();
}
}
totalReadStates -= removedCount;
return downsamplingExtent;
public void reset() {
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
perSampleReads.getValue().clear();
perSampleReads.getValue().reset();
}
}
}
/**
* Note: assuming that, whenever we downsample, we downsample to an integer capacity.
*/
static private class Counter {
private int count;
public Counter(int count) {
this.count = count;
}
public int getCount() {
return count;
}
public void decrement() {
count--;
}
}
}
/**
* Selects reads passed to it based on a criteria decided through inheritance.
* TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option. Get rid of this.
*/
interface ReadSelector {
/**
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
*
* @param read the read to evaluate.
*/
public void submitRead(SAMRecord read);
/**
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
*
* @param read the read previously rejected.
*/
public void notifyReadRejected(SAMRecord read);
/**
* Signal the selector that read additions are complete.
*/
public void complete();
/**
* Retrieve the number of reads seen by this selector so far.
*
* @return number of reads seen.
*/
public long getNumReadsSeen();
/**
* Return the number of reads accepted by this selector so far.
*
* @return number of reads selected.
*/
public long getNumReadsSelected();
/**
* Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the
* last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
* position 3 whose cigar string is 76M, the value of this parameter will be 78.
*
* @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0.
*/
public int getDownsamplingExtent();
/**
* Get the reads selected by this selector.
*
* @return collection of reads selected by this selector.
*/
public Collection<SAMRecord> getSelectedReads();
/**
* Reset this collection to its pre-gathered state.
*/
public void reset();
}
/**
* Select every read passed in.
*/
class AllReadsSelector implements ReadSelector {
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
private long readsSeen = 0;
private int downsamplingExtent = 0;
public void submitRead(SAMRecord read) {
reads.add(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
}
public void complete() {
// NO-OP.
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return readsSeen;
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return reads;
}
public void reset() {
reads.clear();
readsSeen = 0;
downsamplingExtent = 0;
}
}
/**
* Select N reads randomly from the input stream.
*/
class NRandomReadSelector implements ReadSelector {
private final ReservoirDownsampler<SAMRecord> reservoir;
private final ReadSelector chainedSelector;
private long readsSeen = 0;
private int downsamplingExtent = 0;
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
this.reservoir = new ReservoirDownsampler<SAMRecord>((int) readLimit);
this.chainedSelector = chainedSelector;
}
public void submitRead(SAMRecord read) {
SAMRecord displaced = reservoir.add(read);
if (displaced != null && chainedSelector != null) {
chainedSelector.notifyReadRejected(read);
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
}
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
readsSeen++;
}
public void complete() {
for (SAMRecord read : reservoir.getDownsampledContents())
chainedSelector.submitRead(read);
if (chainedSelector != null)
chainedSelector.complete();
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return reservoir.size();
}
public int getDownsamplingExtent() {
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
return reservoir.getDownsampledContents();
}
public void reset() {
reservoir.clear();
downsamplingExtent = 0;
if (chainedSelector != null)
chainedSelector.reset();
}
}
/**
* Note: stores reads by sample ID string, not by sample object
*/
class SamplePartitioner implements ReadSelector {
private final Map<String, ReadSelector> readsBySample;
private long readsSeen = 0;
public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
readsBySample = readSelectors;
}
public void submitRead(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).submitRead(read);
readsSeen++;
}
public void notifyReadRejected(SAMRecord read) {
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
if (readsBySample.containsKey(sampleName))
readsBySample.get(sampleName).notifyReadRejected(read);
readsSeen++;
}
public void complete() {
// NO-OP.
}
public long getNumReadsSeen() {
return readsSeen;
}
public long getNumReadsSelected() {
return readsSeen;
}
public int getDownsamplingExtent() {
int downsamplingExtent = 0;
for (ReadSelector storage : readsBySample.values())
downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
return downsamplingExtent;
}
public Collection<SAMRecord> getSelectedReads() {
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
}
public ReadSelector getSelectedReads(String sampleName) {
if (!readsBySample.containsKey(sampleName))
throw new NoSuchElementException("Sample name not found");
return readsBySample.get(sampleName);
}
public void reset() {
for (ReadSelector storage : readsBySample.values())
storage.reset();
readsSeen = 0;
}
}
}

View File

@ -343,7 +343,7 @@ public class GATKReport {
GATKReportTable table = tables.firstEntry().getValue();
if ( table.getNumColumns() != values.length )
throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
final int rowIndex = table.getNumRows();
for ( int i = 0; i < values.length; i++ )

View File

@ -59,8 +59,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
public enum Model {
SNP,
INDEL,
GeneralPloidySNP,
GeneralPloidyINDEL,
GENERALPLOIDYSNP,
GENERALPLOIDYINDEL,
BOTH
}

View File

@ -52,7 +52,7 @@ import java.util.*;
public class UnifiedGenotyperEngine {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
private static final String GPSTRING = "GeneralPloidy";
private static final String GPSTRING = "GENERALPLOIDY";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
@ -79,6 +79,7 @@ public class UnifiedGenotyperEngine {
// the model used for calculating genotypes
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
private final List<GenotypeLikelihoodsCalculationModel.Model> modelsToUse = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
// the model used for calculating p(non-ref)
private ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>();
@ -134,6 +135,8 @@ public class UnifiedGenotyperEngine {
computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
filter.add(LOW_QUAL_FILTER_NAME);
determineGLModelsToUse();
}
/**
@ -286,7 +289,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@ -634,48 +637,51 @@ public class UnifiedGenotyperEngine {
(UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
}
private void determineGLModelsToUse() {
String modelPrefix = "";
if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
modelPrefix = GPSTRING;
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
modelPrefix += UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
else {
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
}
// decide whether we are currently processing SNPs, indels, neither, or both
private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
final ReferenceContext refContext,
final AlignmentContext rawContext) {
final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
String modelPrefix = "";
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
if ( UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES )
return modelsToUse;
if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
modelPrefix = GPSTRING + modelPrefix;
// if we're genotyping given alleles then we need to choose the model corresponding to the variant type requested
final List<GenotypeLikelihoodsCalculationModel.Model> GGAmodel = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(1);
final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
if ( vcInput == null )
return GGAmodel; // no work to be done
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
if ( vcInput == null )
return models;
if ( vcInput.isSNP() ) {
// ignore SNPs if the user chose INDEL mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
}
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// ignore INDELs if the user chose SNP mode only
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
// No support for other types yet
if ( vcInput.isSNP() ) {
// use the SNP model unless the user chose INDEL mode only
if ( modelsToUse.size() == 2 || modelsToUse.get(0).name().endsWith("SNP") )
GGAmodel.add(modelsToUse.get(0));
}
else {
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
}
else {
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
}
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
// use the INDEL model unless the user chose SNP mode only
if ( modelsToUse.size() == 2 )
GGAmodel.add(modelsToUse.get(1));
else if ( modelsToUse.get(0).name().endsWith("INDEL") )
GGAmodel.add(modelsToUse.get(0));
}
// No support for other types yet
return models;
return GGAmodel;
}
public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -52,7 +51,7 @@ public class AFCalcResult {
private final double[] log10PriorsOfAC;
private final double[] log10PosteriorsOfAC;
private final Map<Allele, Double> log10pNonRefByAllele;
private final Map<Allele, Double> log10pRefByAllele;
/**
* The AC values for all ALT alleles at the MLE
@ -74,16 +73,16 @@ public class AFCalcResult {
final List<Allele> allelesUsedInGenotyping,
final double[] log10LikelihoodsOfAC,
final double[] log10PriorsOfAC,
final Map<Allele, Double> log10pNonRefByAllele) {
final Map<Allele, Double> log10pRefByAllele) {
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations);
if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2");
if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2");
if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null");
if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
@ -94,7 +93,7 @@ public class AFCalcResult {
this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES);
this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES);
this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC);
this.log10pNonRefByAllele = new HashMap<Allele, Double>(log10pNonRefByAllele);
this.log10pRefByAllele = new HashMap<Allele, Double>(log10pRefByAllele);
}
/**
@ -104,7 +103,7 @@ public class AFCalcResult {
* @return
*/
public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
}
/**
@ -219,7 +218,7 @@ public class AFCalcResult {
public String toString() {
final List<String> byAllele = new LinkedList<String>();
for ( final Allele a : getAllelesUsedInGenotyping() )
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a)));
return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
}
@ -231,13 +230,16 @@ public class AFCalcResult {
* And that log10minPNonRef is -3.
* We are considered polymorphic since 10^-5 < 10^-3 => -5 < -3
*
* Note that log10minPNonRef is really the minimum confidence, scaled as an error rate, so
* if you want to be 99% confidence, then log10PNonRef should be log10(0.01) = -2.
*
* @param log10minPNonRef the log10 scaled min pr of being non-ref to be considered polymorphic
*
* @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0
*/
@Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef;
}
/**
@ -245,7 +247,7 @@ public class AFCalcResult {
*/
public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
final double log10Threshold = minPNonRefPhredScaledQual / -10;
return isPolymorphic(allele, log10Threshold);
}
@ -263,7 +265,16 @@ public class AFCalcResult {
}
/**
* Returns the log10 probability that allele is segregating
* Returns the log10 probability that allele is not segregating
*
* Note that this function is p not segregating so that we can store
* internally the log10 value of AF == 0, which grows very quickly
* negative and yet has sufficient resolution for high confidence tests.
* For example, if log10pRef == -100, not an unreasonably high number,
* if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which
* quickly underflows to 1. So the logic here is backward from what
* you really want (the p of segregating) but we do that for numerical
* reasons
*
* Unlike the sites-level annotation, this calculation is specific to allele, and can be
* used to separately determine how much evidence there is that allele is independently
@ -272,11 +283,11 @@ public class AFCalcResult {
* evidence for one allele but not so much for any other allele
*
* @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
* @return the log10 probability that allele is segregating at this site
* @return the log10 probability that allele is not segregating at this site
*/
@Ensures("MathUtils.goodLog10Probability(result)")
public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
final Double log10pNonRef = log10pNonRefByAllele.get(allele);
public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) {
final Double log10pNonRef = log10pRefByAllele.get(allele);
if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
return log10pNonRef;
}

View File

@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable {
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
if ( allele.isNonReference() ) {
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele));
}
}
@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable {
final double[] posteriors = new double[2];
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
final List<Integer> mle = new ArrayList<Integer>();
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>();
long runtimeNano = -1;
GenomeLoc currentLoc = null;
@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable {
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
builder.genotypes(genotypes);
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
calls.add(new ExactCall(builder.make(), runtimeNano, result));
}
break;
@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable {
posteriors[1] = Double.valueOf(value);
} else if (variable.equals("MLE")) {
mle.add(Integer.valueOf(value));
} else if (variable.equals("pNonRefByAllele")) {
} else if (variable.equals("pRefByAllele")) {
final Allele a = Allele.create(key);
log10pNonRefByAllele.put(a, Double.valueOf(value));
log10pRefByAllele.put(a, Double.valueOf(value));
} else if (variable.equals("runtime.nano")) {
runtimeNano = Long.valueOf(value);
} else {

View File

@ -125,8 +125,8 @@ import java.util.*;
*/
final List<AFCalcResult> supporting;
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
this.supporting = supporting;
}
}
@ -323,7 +323,7 @@ import java.util.*;
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
final int[] alleleCountsOfMLE = new int[nAltAlleles];
final double[] log10PriorsOfAC = new double[2];
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
double log10PosteriorOfACEq0Sum = 0.0;
@ -348,7 +348,7 @@ import java.util.*;
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0());
// trivial -- update the number of evaluations
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
@ -384,6 +384,6 @@ import java.util.*;
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
// priors incorporate multiple alt alleles, must be normalized
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
log10pRefByAllele, sortedResultsWithThetaNPriors);
}
}

View File

@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)};
final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors);
final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO;
final Map<Allele, Double> log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef);
final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
MathUtils.normalizeFromLog10(log10Likelihoods, true),
MathUtils.normalizeFromLog10(log10Priors, true),
log10pNonRefByAllele);
log10pRefByAllele);
}
/**

View File

@ -165,14 +165,14 @@ final class StateTracker {
final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
for ( int i = 0; i < subACOfMLE.length; i++ ) {
final Allele allele = allelesUsedInGenotyping.get(i+1);
final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
log10pNonRefByAllele.put(allele, log10PNonRef);
final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was
log10pRefByAllele.put(allele, log10PRef);
}
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
}
// --------------------------------------------------------------------------------

View File

@ -287,6 +287,9 @@ public class PairHMMIndelErrorModel {
if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
startLocationInRefForHaplotypes = ref.getWindow().getStart(); // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
}
else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely;
}
if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context
@ -338,6 +341,8 @@ public class PairHMMIndelErrorModel {
if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
startLocationInRefForHaplotypes = haplotype.getStartPosition();
else if (startLocationInRefForHaplotypes > haplotype.getStopPosition())
startLocationInRefForHaplotypes = haplotype.getStopPosition();
final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
final long indStop = stopLocationInRefForHaplotypes - haplotype.getStartPosition();
@ -347,8 +352,6 @@ public class PairHMMIndelErrorModel {
System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
(int)indStart, (int)indStop);

View File

@ -135,6 +135,9 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
@Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false)
private boolean permitNoSampleOverlap = false;
/**
* Important note: do not use this argument if your input data set is not already phased or it will cause the tool to skip over all heterozygous sites.
*/
@Argument(fullName = "respectPhaseInInput", shortName = "respectPhaseInInput", doc = "Will only phase genotypes in cases where the resulting output will necessarily be consistent with any existing phase (for example, from trios)", required = false)
private boolean respectPhaseInInput = false;

View File

@ -1,149 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
import java.util.*;
/**
* Emits intervals present in either the original or reduced bam but not the other.
*
* <h2>Input</h2>
* <p>
* The original and reduced BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A list of intervals present in one bam but not the other.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -I:original original.bam \
* -I:reduced reduced.bam \
* -R ref.fasta \
* -T AssessReducedCoverage \
* -o output.intervals
* </pre>
*
* @author ebanks
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
@Hidden
public class AssessReducedCoverage extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
private static final String original = "original";
private static final String reduced = "reduced";
@Output
protected PrintStream out;
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@Argument(fullName = "output_reduced_only_coverage", shortName = "output_reduced_only_coverage", doc = "Output an interval if the reduced bam has coverage where the original does not", required = false)
public boolean OUTPUT_REDUCED_ONLY_INTERVALS = false;
public void initialize() {}
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
Set<String> tags = getAllTags(context.getBasePileup());
return (tags.contains(original) && !tags.contains(reduced)) ||
(OUTPUT_REDUCED_ONLY_INTERVALS && tags.contains(reduced) && !tags.contains(original)) ? ref.getLocus() : null;
}
private Set<String> getAllTags(final ReadBackedPileup pileup) {
final Set<String> tags = new HashSet<String>(10);
for ( final PileupElement p : pileup ) {
if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() )
tags.addAll(getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags());
}
return tags;
}
public void onTraversalDone(GenomeLoc sum) {
if ( sum != null )
out.println(sum);
}
public GenomeLoc reduceInit() {
return null;
}
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
if ( lhs == null )
return rhs;
if ( rhs == null )
return lhs;
// if contiguous, just merge them
if ( lhs.contiguousP(rhs) )
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
// otherwise, print the lhs and start over with the rhs
out.println(lhs);
return rhs;
}
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
if ( value == null )
return sum;
if ( sum == null )
return value;
// if contiguous, just merge them
if ( sum.contiguousP(value) )
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
// otherwise, print the sum and start over with the value
out.println(sum);
return value;
}
}

View File

@ -1,173 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
import java.util.List;
/**
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
* the reduced bam are above sufficient threshold)
*
* <h2>Input</h2>
* <p>
* The original and reduced BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -I:original original.bam \
* -I:reduced reduced.bam \
* -R ref.fasta \
* -T AssessReducedQuals \
* -o output.intervals
* </pre>
*
* @author ami
*/
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
private static final String reduced = "reduced";
private static final String original = "original";
private static final int originalQualsIndex = 0;
private static final int reducedQualsIndex = 1;
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
public int sufficientQualSum = 600;
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
public int qual_epsilon = 0;
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional
public int debugLevel = 0; // TODO -- best to make this an enum or boolean
@Output
protected PrintStream out;
public void initialize() {
if (debugLevel != 0)
out.println(" Debug mode" +
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
"Debug:\tqual_epsilon: "+qual_epsilon);
}
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@Override
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return null;
boolean reportLocus;
final int[] quals = getPileupQuals(context.getBasePileup());
if (debugLevel != 0)
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
if(debugLevel != 0 && reportLocus)
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
return reportLocus ? ref.getLocus() : null;
}
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
final int[] quals = new int[2];
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
for( PileupElement p : readPileup ){
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
if ( isGoodRead(p,tags) ){
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
final int tagIndex = getTagIndex(tags);
quals[tagIndex] += tempQual;
if(debugLevel == 2)
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
}
}
if(debugLevel == 2){
out.println(printPileup[originalQualsIndex]);
out.println(printPileup[reducedQualsIndex]);
}
return quals;
}
// TODO -- arguments/variables should be final, not method declaration
private final boolean isGoodRead(PileupElement p, List<String> tags){
// TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which
// TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only
// TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
}
private final int getTagIndex(List<String> tags){
return tags.contains(reduced) ? 1 : 0;
}
@Override
public void onTraversalDone(GenomeLoc sum) {
if ( sum != null )
out.println(sum);
}
@Override
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
if ( lhs == null )
return rhs;
if ( rhs == null )
return lhs;
// if contiguous, just merge them
if ( lhs.contiguousP(rhs) )
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
// otherwise, print the lhs and start over with the rhs
out.println(lhs);
return rhs;
}
@Override
public GenomeLoc reduceInit() {
return null;
}
@Override
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
if ( value == null )
return sum;
if ( sum == null )
return value;
// if contiguous, just merge them
if ( sum.contiguousP(value) )
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
// otherwise, print the sum and start over with the value
out.println(sum);
return value;
}
}

View File

@ -134,7 +134,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
protected VariantContextWriter vcfWriter = null;
@Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false)
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
@Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
@ -200,13 +200,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
} else
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
if ( PRIORITY_STRING == null ) {
validateAnnotateUnionArguments();
if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING);
logger.info("Priority string not provided, using arbitrary genotyping order: "+priority);
}
validateAnnotateUnionArguments();
samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
if ( SET_KEY.toLowerCase().equals("null") )
@ -228,22 +228,22 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE )
if ( PRIORITY_STRING != null){
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
else
priority = new ArrayList<String>(rodNames);
if ( rodNames.size() != priority.size() )
throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
if ( rodNames.size() != priority.size() )
throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
if ( ! rodNames.containsAll(priority) )
throw new UserException.BadArgumentValue("rod_priority_list", "Not all priority elements provided as input RODs: " + PRIORITY_STRING);
}
if ( ! rodNames.containsAll(priority) )
throw new UserException.BadArgumentValue("rod_priority_list", "Not all priority elements provided as input RODs: " + PRIORITY_STRING);
}
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null ) // RodWalkers can make funky map calls
return 0;
Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
// get all of the vcf rods at this locus
// Need to provide reference bases to simpleMerge starting at current locus
Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
@ -290,13 +290,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
for (VariantContext.Type type : VariantContext.Type.values()) {
if (VCsByType.containsKey(type))
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
}
else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs,
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
else {

View File

@ -495,4 +495,14 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
public long sizeOfOverlap( final GenomeLoc that ) {
return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) + 1L : 0L );
}
/**
* Returns the maximum GenomeLoc of this and other
* @param other another non-null genome loc
* @return the max of this and other
*/
public GenomeLoc max(final GenomeLoc other) {
final int cmp = this.compareTo(other);
return cmp == -1 ? other : this;
}
}

View File

@ -8,6 +8,8 @@ import java.util.Collection;
import java.util.Iterator;
/**
* THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES
*
* Randomly downsample from a stream of elements. This algorithm is a direct,
* naive implementation of reservoir downsampling as described in "Random Downsampling
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
@ -16,7 +18,7 @@ import java.util.Iterator;
* @author mhanna
* @version 0.1
*/
public class ReservoirDownsampler<T> {
public class LegacyReservoirDownsampler<T> {
/**
* The reservoir of elements tracked by this downsampler.
*/
@ -31,7 +33,7 @@ public class ReservoirDownsampler<T> {
* Create a new downsampler with the given source iterator and given comparator.
* @param maxElements What is the maximum number of reads that can be returned in any call of this
*/
public ReservoirDownsampler(final int maxElements) {
public LegacyReservoirDownsampler(final int maxElements) {
if(maxElements < 0)
throw new ReviewedStingException("Unable to work with an negative size collection of elements");
this.reservoir = new ArrayList<T>(maxElements);

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
import org.broadinstitute.sting.utils.SimpleTimer;
import java.util.Iterator;
import java.util.concurrent.BlockingQueue;
@ -19,11 +18,6 @@ class InputProducer<InputType> implements Runnable {
*/
final Iterator<InputType> inputReader;
/**
* Our timer (may be null) that we use to track our input costs
*/
final SimpleTimer inputTimer;
/**
* Where we put our input values for consumption
*/
@ -51,16 +45,13 @@ class InputProducer<InputType> implements Runnable {
public InputProducer(final Iterator<InputType> inputReader,
final MultiThreadedErrorTracker errorTracker,
final SimpleTimer inputTimer,
final BlockingQueue<InputValue> outputQueue) {
if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
if ( errorTracker == null ) throw new IllegalArgumentException("errorTracker cannot be null");
if ( inputTimer == null ) throw new IllegalArgumentException("inputTimer cannot be null");
if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null");
this.inputReader = inputReader;
this.errorTracker = errorTracker;
this.inputTimer = inputTimer;
this.outputQueue = outputQueue;
}
@ -94,16 +85,15 @@ class InputProducer<InputType> implements Runnable {
* @throws InterruptedException
*/
private synchronized InputType readNextItem() throws InterruptedException {
inputTimer.restart();
if ( ! inputReader.hasNext() ) {
// we are done, mark ourselves as such and return null
readLastValue = true;
inputTimer.stop();
return null;
} else {
// get the next value, and return it
final InputType input = inputReader.next();
inputTimer.stop();
if ( input == null )
throw new IllegalStateException("inputReader.next() returned a null value, breaking our contract");
nRead++;
return input;
}
@ -121,6 +111,9 @@ class InputProducer<InputType> implements Runnable {
final InputType value = readNextItem();
if ( value == null ) {
if ( ! readLastValue )
throw new IllegalStateException("value == null but readLastValue is false!");
// add the EOF object so our consumer knows we are done in all inputs
// note that we do not increase inputID here, so that variable indicates the ID
// of the last real value read from the queue
@ -133,8 +126,10 @@ class InputProducer<InputType> implements Runnable {
}
latch.countDown();
} catch (Exception ex) {
} catch (Throwable ex) {
errorTracker.notifyOfError(ex);
} finally {
// logger.info("Exiting input thread readLastValue = " + readLastValue);
}
}

View File

@ -1,67 +0,0 @@
package org.broadinstitute.sting.utils.nanoScheduler;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.SimpleTimer;
/**
* Holds runtime profile (input, read, map) times as tracked by NanoScheduler
*
* User: depristo
* Date: 9/10/12
* Time: 8:31 PM
*/
public class NSRuntimeProfile {
final SimpleTimer outsideSchedulerTimer = new SimpleTimer("outside");
final SimpleTimer inputTimer = new SimpleTimer("input");
final SimpleTimer mapTimer = new SimpleTimer("map");
final SimpleTimer reduceTimer = new SimpleTimer("reduce");
/**
* Combine the elapsed time information from other with this profile
*
* @param other a non-null profile
*/
public void combine(final NSRuntimeProfile other) {
outsideSchedulerTimer.addElapsed(other.outsideSchedulerTimer);
inputTimer.addElapsed(other.inputTimer);
mapTimer.addElapsed(other.mapTimer);
reduceTimer.addElapsed(other.reduceTimer);
}
/**
* Print the runtime profiling to logger
*
* @param logger
*/
public void log(final Logger logger) {
log1(logger, "Input time", inputTimer);
log1(logger, "Map time", mapTimer);
log1(logger, "Reduce time", reduceTimer);
log1(logger, "Outside time", outsideSchedulerTimer);
}
/**
* @return the total runtime for all functions of this nano scheduler
*/
//@Ensures("result >= 0.0")
public double totalRuntimeInSeconds() {
return inputTimer.getElapsedTime()
+ mapTimer.getElapsedTime()
+ reduceTimer.getElapsedTime()
+ outsideSchedulerTimer.getElapsedTime();
}
/**
* Print to logger.info timing information from timer, with name label
*
* @param label the name of the timer to display. Should be human readable
* @param timer the timer whose elapsed time we will display
*/
//@Requires({"label != null", "timer != null"})
private void log1(final Logger logger, final String label, final SimpleTimer timer) {
final double myTimeInSec = timer.getElapsedTime();
final double myTimePercent = myTimeInSec / totalRuntimeInSeconds() * 100;
logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
}
}

View File

@ -57,16 +57,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
boolean debug = false;
private NSProgressFunction<InputType> progressFunction = null;
/**
* Tracks the combined runtime profiles across all created nano schedulers
*/
final static private NSRuntimeProfile combinedNSRuntimeProfiler = new NSRuntimeProfile();
/**
* The profile specific to this nano scheduler
*/
final private NSRuntimeProfile myNSRuntimeProfile = new NSRuntimeProfile();
/**
* Create a new nanoscheduler with the desire characteristics requested by the argument
*
@ -94,9 +84,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-master-thread-%d"));
}
// start timing the time spent outside of the nanoScheduler
myNSRuntimeProfile.outsideSchedulerTimer.start();
}
/**
@ -123,11 +110,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
* After this call, execute cannot be invoked without throwing an error
*/
public void shutdown() {
myNSRuntimeProfile.outsideSchedulerTimer.stop();
// add my timing information to the combined NS runtime profile
combinedNSRuntimeProfiler.combine(myNSRuntimeProfile);
if ( nThreads > 1 ) {
shutdownExecutor("inputExecutor", inputExecutor);
shutdownExecutor("mapExecutor", mapExecutor);
@ -137,19 +119,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
shutdown = true;
}
public void printRuntimeProfile() {
myNSRuntimeProfile.log(logger);
}
public static void printCombinedRuntimeProfile() {
if ( combinedNSRuntimeProfiler.totalRuntimeInSeconds() > 0.1 )
combinedNSRuntimeProfiler.log(logger);
}
protected double getTotalRuntime() {
return myNSRuntimeProfile.totalRuntimeInSeconds();
}
/**
* Helper function to cleanly shutdown an execution service, checking that the execution
* state is clean when it's done.
@ -245,8 +214,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
myNSRuntimeProfile.outsideSchedulerTimer.stop();
ReduceType result;
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
result = executeSingleThreaded(inputReader, map, initialValue, reduce);
@ -254,7 +221,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
result = executeMultiThreaded(inputReader, map, initialValue, reduce);
}
myNSRuntimeProfile.outsideSchedulerTimer.restart();
return result;
}
@ -273,28 +239,19 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
while ( true ) {
// start timer to ensure that both hasNext and next are caught by the timer
myNSRuntimeProfile.inputTimer.restart();
if ( ! inputReader.hasNext() ) {
myNSRuntimeProfile.inputTimer.stop();
break;
} else {
final InputType input = inputReader.next();
myNSRuntimeProfile.inputTimer.stop();
// map
myNSRuntimeProfile.mapTimer.restart();
final long preMapTime = LOG_MAP_TIMES ? 0 : myNSRuntimeProfile.mapTimer.currentTimeNano();
final MapType mapValue = map.apply(input);
if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (myNSRuntimeProfile.mapTimer.currentTimeNano() - preMapTime));
myNSRuntimeProfile.mapTimer.stop();
if ( i++ % this.bufferSize == 0 && progressFunction != null )
if ( progressFunction != null )
progressFunction.progress(input);
// reduce
myNSRuntimeProfile.reduceTimer.restart();
sum = reduce.apply(mapValue, sum);
myNSRuntimeProfile.reduceTimer.stop();
}
}
@ -320,6 +277,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
while ( true ) {
// check that no errors occurred while we were waiting
handleErrors();
// checkForDeadlocks();
try {
final ReduceType result = reduceResult.get(100, TimeUnit.MILLISECONDS);
@ -341,6 +299,26 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
}
}
// private void checkForDeadlocks() {
// if ( deadLockCheckCounter++ % 100 == 0 ) {
// logger.info("Checking for deadlocks...");
// final ThreadMXBean bean = ManagementFactory.getThreadMXBean();
// final long[] threadIds = bean.findDeadlockedThreads(); // Returns null if no threads are deadlocked.
//
// if (threadIds != null) {
// final ThreadInfo[] infos = bean.getThreadInfo(threadIds);
//
// logger.error("!!! Deadlock detected !!!!");
// for (final ThreadInfo info : infos) {
// logger.error("Thread " + info);
// for ( final StackTraceElement elt : info.getStackTrace() ) {
// logger.error("\t" + elt.toString());
// }
// }
// }
// }
// }
private void handleErrors() {
if ( errorTracker.hasAnErrorOccurred() ) {
masterExecutor.shutdownNow();
@ -380,7 +358,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
// Create the input producer and start it running
final InputProducer<InputType> inputProducer =
new InputProducer<InputType>(inputReader, errorTracker, myNSRuntimeProfile.inputTimer, inputQueue);
new InputProducer<InputType>(inputReader, errorTracker, inputQueue);
inputExecutor.submit(inputProducer);
// a priority queue that stores up to bufferSize elements
@ -389,7 +367,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
new PriorityBlockingQueue<MapResult<MapType>>();
final Reducer<MapType, ReduceType> reducer
= new Reducer<MapType, ReduceType>(reduce, errorTracker, myNSRuntimeProfile.reduceTimer, initialValue);
= new Reducer<MapType, ReduceType>(reduce, errorTracker, initialValue);
try {
int nSubmittedJobs = 0;
@ -408,7 +386,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
// wait for all of the input and map threads to finish
return waitForCompletion(inputProducer, reducer);
} catch (Exception ex) {
} catch (Throwable ex) {
// logger.warn("Reduce job got exception " + ex);
errorTracker.notifyOfError(ex);
return initialValue;
}
@ -486,16 +465,12 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
final InputType input = inputWrapper.getValue();
// map
myNSRuntimeProfile.mapTimer.restart();
final long preMapTime = LOG_MAP_TIMES ? 0 : myNSRuntimeProfile.mapTimer.currentTimeNano();
final MapType mapValue = map.apply(input);
if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (myNSRuntimeProfile.mapTimer.currentTimeNano() - preMapTime));
myNSRuntimeProfile.mapTimer.stop();
// enqueue the result into the mapResultQueue
result = new MapResult<MapType>(mapValue, jobID);
if ( jobID % bufferSize == 0 && progressFunction != null )
if ( progressFunction != null )
progressFunction.progress(input);
} else {
// push back the EOF marker so other waiting threads can read it
@ -508,7 +483,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
mapResultQueue.put(result);
final int nReduced = reducer.reduceAsMuchAsPossible(mapResultQueue);
} catch (Exception ex) {
} catch (Throwable ex) {
// logger.warn("Map job got exception " + ex);
errorTracker.notifyOfError(ex);
} finally {
// we finished a map job, release the job queue semaphore

View File

@ -4,7 +4,6 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
import org.broadinstitute.sting.utils.SimpleTimer;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.PriorityBlockingQueue;
@ -34,7 +33,6 @@ class Reducer<MapType, ReduceType> {
final CountDownLatch countDownLatch = new CountDownLatch(1);
final NSReduceFunction<MapType, ReduceType> reduce;
final SimpleTimer reduceTimer;
final MultiThreadedErrorTracker errorTracker;
/**
@ -61,20 +59,16 @@ class Reducer<MapType, ReduceType> {
* reduceTimer
*
* @param reduce the reduce function to apply
* @param reduceTimer the timer to time the reduce function call
* @param initialSum the initial reduce sum
*/
public Reducer(final NSReduceFunction<MapType, ReduceType> reduce,
final MultiThreadedErrorTracker errorTracker,
final SimpleTimer reduceTimer,
final ReduceType initialSum) {
if ( errorTracker == null ) throw new IllegalArgumentException("Error tracker cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null");
if ( reduceTimer == null ) throw new IllegalArgumentException("reduceTimer cannot be null");
this.errorTracker = errorTracker;
this.reduce = reduce;
this.reduceTimer = reduceTimer;
this.sum = initialSum;
}
@ -125,10 +119,7 @@ class Reducer<MapType, ReduceType> {
nReducesNow++;
// apply reduce, keeping track of sum
reduceTimer.restart();
sum = reduce.apply(result.getValue(), sum);
reduceTimer.stop();
}
numJobsReduced++;

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.progressmeter;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -143,6 +144,12 @@ public class ProgressMeter {
/** We use the SimpleTimer to time our run */
private final SimpleTimer timer = new SimpleTimer();
private GenomeLoc maxGenomeLoc = null;
private String positionMessage = "starting";
private long nTotalRecordsProcessed = 0;
final ProgressMeterDaemon progressMeterDaemon;
/**
* Create a new ProgressMeter
*
@ -177,21 +184,15 @@ public class ProgressMeter {
targetSizeInBP = processingIntervals.coveredSize();
// start up the timer
progressMeterDaemon = new ProgressMeterDaemon(this);
start();
}
/**
* Forward request to notifyOfProgress
*
* Assumes that one cycle has been completed
*
* @param loc our current location. Null means "in unmapped reads"
* @param nTotalRecordsProcessed the total number of records we've processed
* Start up the progress meter, printing initialization message and starting up the
* daemon thread for periodic printing.
*/
public void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
notifyOfProgress(loc, false, nTotalRecordsProcessed);
}
@Requires("progressMeterDaemon != null")
private synchronized void start() {
timer.start();
lastProgressPrintTime = timer.currentTime();
@ -199,6 +200,8 @@ public class ProgressMeter {
logger.info("[INITIALIZATION COMPLETE; STARTING PROCESSING]");
logger.info(String.format("%15s processed.%s runtime per.1M.%s completed total.runtime remaining",
"Location", processingUnitName, processingUnitName));
progressMeterDaemon.start();
}
/**
@ -216,19 +219,41 @@ public class ProgressMeter {
* Synchronized to ensure that even with multiple threads calling notifyOfProgress we still
* get one clean stream of meter logs.
*
* Note this thread doesn't actually print progress, unless must print is true, but just registers
* the progress itself. A separate printing daemon periodically polls the meter to print out
* progress
*
* @param loc Current location, can be null if you are at the end of the processing unit
* @param mustPrint If true, will print out info, regardless of time interval
* @param nTotalRecordsProcessed the total number of records we've processed
*/
private synchronized void notifyOfProgress(final GenomeLoc loc, boolean mustPrint, final long nTotalRecordsProcessed) {
public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
// weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);
// a pretty name for our position
this.positionMessage = maxGenomeLoc == null
? "unmapped reads"
: String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart());
}
/**
* Actually try to print out progress
*
* This function may print out if the progress print is due, but if not enough time has elapsed
* since the last print we will not print out information.
*
* @param mustPrint if true, progress will be printed regardless of the last time we printed progress
*/
protected synchronized void printProgress(final boolean mustPrint) {
final long curTime = timer.currentTime();
final boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, progressPrintFrequency);
final boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
if ( printProgress || printLog ) {
final ProgressMeterData progressData = takeProgressSnapshot(loc, nTotalRecordsProcessed);
final ProgressMeterData progressData = takeProgressSnapshot(maxGenomeLoc, nTotalRecordsProcessed);
final AutoFormattingTime elapsed = new AutoFormattingTime(progressData.getElapsedSeconds());
final AutoFormattingTime bpRate = new AutoFormattingTime(progressData.secondsPerMillionBP());
@ -241,13 +266,8 @@ public class ProgressMeter {
lastProgressPrintTime = curTime;
updateLoggerPrintFrequency(estTotalRuntime.getTimeInSeconds());
// a pretty name for our position
final String posName = loc == null
? (mustPrint ? "done" : "unmapped reads")
: String.format("%s:%d", loc.getContig(), loc.getStart());
logger.info(String.format("%15s %5.2e %s %s %5.1f%% %s %s",
posName, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
positionMessage, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
}
@ -296,13 +316,18 @@ public class ProgressMeter {
*/
public void notifyDone(final long nTotalRecordsProcessed) {
// print out the progress meter
notifyOfProgress(null, true, nTotalRecordsProcessed);
this.nTotalRecordsProcessed = nTotalRecordsProcessed;
this.positionMessage = "done";
printProgress(true);
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours",
timer.getElapsedTime(), timer.getElapsedTime() / 60, timer.getElapsedTime() / 3600));
if ( performanceLog != null )
performanceLog.close();
// shutdown our daemon thread
progressMeterDaemon.done();
}
/**

View File

@ -0,0 +1,60 @@
package org.broadinstitute.sting.utils.progressmeter;
/**
* Daemon thread that periodically prints the progress of the progress meter
*
* User: depristo
* Date: 12/4/12
* Time: 9:16 PM
*/
public final class ProgressMeterDaemon extends Thread {
/**
* How frequently should we poll and print progress?
*/
private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
/**
* Are we to continue periodically printing status, or should we shut down?
*/
boolean done = false;
/**
* The meter we will call print on
*/
final ProgressMeter meter;
/**
* Create a new ProgressMeterDaemon printing progress for meter
* @param meter the progress meter to print progress of
*/
public ProgressMeterDaemon(final ProgressMeter meter) {
if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
this.meter = meter;
setDaemon(true);
setName("ProgressMeterDaemon");
}
/**
* Tells this daemon thread to shutdown at the next opportunity, as the progress
* metering is complete.
*/
public final void done() {
this.done = true;
}
/**
* Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
* necessary, the provided progress meter. Never exits until the JVM is complete,
* or done() is called, as the thread is a daemon thread
*/
public void run() {
while (! done) {
meter.printProgress(false);
try {
Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
}

View File

@ -448,11 +448,47 @@ public class VariantContextUtils {
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
return simpleMerge(genomeLocParser,unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC);
}
/**
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
* If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
* the sample name
*
* @param genomeLocParser loc parser
* @param unsortedVCs collection of unsorted VCs
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
* @return new VariantContext representing the merge of unsortedVCs
*/
public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
final Collection<VariantContext> unsortedVCs,
final List<String> priorityListOfVCs,
final int originalNumOfVCs,
final FilteredRecordMergeType filteredRecordMergeType,
final GenotypeMergeType genotypeMergeOptions,
final boolean annotateOrigin,
final boolean printMessages,
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
if ( annotateOrigin && priorityListOfVCs == null )
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");
if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");
if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
verifyUniqueSampleNames(unsortedVCs);
@ -597,7 +633,7 @@ public class VariantContextUtils {
if ( annotateOrigin ) { // we care about where the call came from
String setValue;
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered
setValue = MERGE_INTERSECTION;
else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = MERGE_FILTER_IN_ALL;

View File

@ -248,7 +248,8 @@ class VCFWriter extends IndexingVariantContextWriter {
}
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
// note that we cannot call flush here if we want block gzipping to work properly
// calling flush results in all gzipped blocks for each variant
} catch (IOException e) {
throw new RuntimeException("Unable to write the VCF object to " + getStreamName(), e);
}

View File

@ -32,11 +32,10 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LegacyLocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -85,7 +84,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
while(locusIteratorByState.hasNext()) {
locusIteratorByState.next().getLocation();
}

View File

@ -45,10 +45,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
public class ReadShardBalancerUnitTest extends BaseTest {
/**
* Tests to ensure that ExperimentalReadShardBalancer works as expected and does not place shard boundaries
* Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
* at inappropriate places, such as within an alignment start position
*/
private static class ExperimentalReadShardBalancerTest extends TestDataProvider {
@ -74,7 +74,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
this.stackSize = stackSize;
this.numUnmappedReads = numUnmappedReads;
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, true);
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false);
this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
@ -96,7 +96,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
new ArrayList<ReadFilter>(),
false);
Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ExperimentalReadShardBalancer());
Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
SAMRecord readAtEndOfLastShard = null;
int totalReadsSeen = 0;

View File

@ -7,10 +7,8 @@ import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -21,14 +19,17 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* testing of the experimental version of LocusIteratorByState
* testing of the LEGACY version of LocusIteratorByState
*/
public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
public class LegacyLocusIteratorByStateUnitTest extends BaseTest {
private static SAMFileHeader header;
private LocusIteratorByStateExperimental li;
private LegacyLocusIteratorByState li;
private GenomeLocParser genomeLocParser;
@BeforeClass
@ -37,8 +38,8 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
}
private LocusIteratorByStateExperimental makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
return new LocusIteratorByStateExperimental(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups());
private LegacyLocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
return new LegacyLocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
@Test
@ -328,184 +329,14 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
// End comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////////
///////////////////////////////////////
// Read State Manager Tests //
///////////////////////////////////////
private class PerSampleReadStateManagerTest extends TestDataProvider {
private List<Integer> readCountsPerAlignmentStart;
private List<SAMRecord> reads;
private List<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>> recordStatesByAlignmentStart;
private int removalInterval;
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
super(PerSampleReadStateManagerTest.class);
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
this.removalInterval = removalInterval;
reads = new ArrayList<SAMRecord>();
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>>();
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
}
public void run() {
LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
LocusIteratorByStateExperimental.ReadStateManager readStateManager =
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
readStateManager.new PerSampleReadStateManager();
makeReads();
for ( ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
}
// read state manager should have the right number of reads
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
Iterator<LocusIteratorByStateExperimental.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
int recordStateCount = 0;
int numReadStatesRemoved = 0;
// Do a first-pass validation of the record state iteration by making sure we get back everything we
// put in, in the same order, doing any requested removals of read states along the way
while ( recordStateIterator.hasNext() ) {
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
recordStateCount++;
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
Assert.assertTrue(originalReadsIterator.hasNext());
SAMRecord originalRead = originalReadsIterator.next();
// The read we get back should be literally the same read in memory as we put in
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
// If requested, remove a read state every removalInterval states
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
recordStateIterator.remove();
numReadStatesRemoved++;
}
}
Assert.assertFalse(originalReadsIterator.hasNext());
// If we removed any read states, do a second pass through the read states to make sure the right
// states were removed
if ( numReadStatesRemoved > 0 ) {
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
originalReadsIterator = reads.iterator();
recordStateIterator = perSampleReadStateManager.iterator();
int readCount = 0;
int readStateCount = 0;
// Match record states with the reads that should remain after removal
while ( recordStateIterator.hasNext() ) {
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
readStateCount++;
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
Assert.assertTrue(originalReadsIterator.hasNext());
SAMRecord originalRead = originalReadsIterator.next();
readCount++;
if ( readCount % removalInterval == 0 ) {
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
readCount++;
}
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
}
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
}
// Allow memory used by this test to be reclaimed
readCountsPerAlignmentStart = null;
reads = null;
recordStatesByAlignmentStart = null;
}
private void makeReads() {
int alignmentStart = 1;
for ( int readsThisStack : readCountsPerAlignmentStart ) {
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByStateExperimental.SAMRecordState>();
for ( SAMRecord read : stackReads ) {
stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read));
}
reads.addAll(stackReads);
recordStatesByAlignmentStart.add(stackRecordStates);
}
}
}
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
public Object[][] createPerSampleReadStateManagerTests() {
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
Arrays.asList(2),
Arrays.asList(10),
Arrays.asList(1, 1),
Arrays.asList(2, 2),
Arrays.asList(10, 10),
Arrays.asList(1, 10),
Arrays.asList(10, 1),
Arrays.asList(1, 1, 1),
Arrays.asList(2, 2, 2),
Arrays.asList(10, 10, 10),
Arrays.asList(1, 1, 1, 1, 1, 1),
Arrays.asList(10, 10, 10, 10, 10, 10),
Arrays.asList(1, 2, 10, 1, 2, 10)
) ) {
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
}
}
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
}
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
logger.warn("Running test: " + test);
test.run();
}
///////////////////////////////////////
// End Read State Manager Tests //
///////////////////////////////////////
///////////////////////////////////////
// Helper methods / classes //
///////////////////////////////////////
private static ReadProperties createTestReadProperties() {
return createTestReadProperties(null);
}
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),
new SAMFileHeader(),
SAMFileHeader.SortOrder.coordinate,
false,
SAMFileReader.ValidationStringency.STRICT,
downsamplingMethod,
null,
new ValidationExclusion(),
Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(),
@ -513,136 +344,137 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
(byte) -1
);
}
}
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
}
@Override
public void close() {}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public T next() {
return iterator.next();
}
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
}
private static final class LIBS_position {
@Override
public void close() {}
SAMRecord read;
@Override
public boolean hasNext() {
return iterator.hasNext();
}
final int numOperators;
int currentOperatorIndex = 0;
int currentPositionOnOperator = 0;
int currentReadOffset = 0;
@Override
public T next() {
return iterator.next();
}
boolean isBeforeDeletionStart = false;
boolean isBeforeDeletedBase = false;
boolean isAfterDeletionEnd = false;
boolean isAfterDeletedBase = false;
boolean isBeforeInsertion = false;
boolean isAfterInsertion = false;
boolean isNextToSoftClip = false;
boolean sawMop = false;
public LIBS_position(final SAMRecord read) {
this.read = read;
numOperators = read.getCigar().numCigarElements();
}
public int getCurrentReadOffset() {
return Math.max(0, currentReadOffset - 1);
}
/**
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
*/
public boolean stepForwardOnGenome() {
if ( currentOperatorIndex == numOperators )
return false;
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
if ( currentPositionOnOperator >= curElement.getLength() ) {
if ( ++currentOperatorIndex == numOperators )
return false;
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
currentPositionOnOperator = 0;
}
switch ( curElement.getOperator() ) {
case I: // insertion w.r.t. the reference
if ( !sawMop )
break;
case S: // soft clip
currentReadOffset += curElement.getLength();
case H: // hard clip
case P: // padding
currentOperatorIndex++;
return stepForwardOnGenome();
case D: // deletion w.r.t. the reference
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
currentPositionOnOperator++;
break;
case M:
case EQ:
case X:
sawMop = true;
currentReadOffset++;
currentPositionOnOperator++;
break;
default:
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
}
final boolean isFirstOp = currentOperatorIndex == 0;
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
return true;
}
private static boolean isBeforeOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isLastOp,
final boolean isLastBaseOfOp) {
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
}
private static boolean isAfterOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isFirstOp,
final boolean isFirstBaseOfOp) {
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
}
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
}
final class LIBS_position {
SAMRecord read;
final int numOperators;
int currentOperatorIndex = 0;
int currentPositionOnOperator = 0;
int currentReadOffset = 0;
boolean isBeforeDeletionStart = false;
boolean isBeforeDeletedBase = false;
boolean isAfterDeletionEnd = false;
boolean isAfterDeletedBase = false;
boolean isBeforeInsertion = false;
boolean isAfterInsertion = false;
boolean isNextToSoftClip = false;
boolean sawMop = false;
public LIBS_position(final SAMRecord read) {
this.read = read;
numOperators = read.getCigar().numCigarElements();
}
public int getCurrentReadOffset() {
return Math.max(0, currentReadOffset - 1);
}
/**
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
*/
public boolean stepForwardOnGenome() {
if ( currentOperatorIndex == numOperators )
return false;
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
if ( currentPositionOnOperator >= curElement.getLength() ) {
if ( ++currentOperatorIndex == numOperators )
return false;
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
currentPositionOnOperator = 0;
}
switch ( curElement.getOperator() ) {
case I: // insertion w.r.t. the reference
if ( !sawMop )
break;
case S: // soft clip
currentReadOffset += curElement.getLength();
case H: // hard clip
case P: // padding
currentOperatorIndex++;
return stepForwardOnGenome();
case D: // deletion w.r.t. the reference
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
currentPositionOnOperator++;
break;
case M:
case EQ:
case X:
sawMop = true;
currentReadOffset++;
currentPositionOnOperator++;
break;
default:
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
}
final boolean isFirstOp = currentOperatorIndex == 0;
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
return true;
}
private static boolean isBeforeOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isLastOp,
final boolean isLastBaseOfOp) {
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
}
private static boolean isAfterOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isFirstOp,
final boolean isFirstBaseOfOp) {
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
}
}

View File

@ -7,8 +7,10 @@ import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -19,13 +21,10 @@ import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.*;
/**
* testing of the LocusIteratorByState
* testing of the new (non-legacy) version of LocusIteratorByState
*/
public class LocusIteratorByStateUnitTest extends BaseTest {
private static SAMFileHeader header;
@ -329,14 +328,184 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
// End comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////////
///////////////////////////////////////
// Read State Manager Tests //
///////////////////////////////////////
private class PerSampleReadStateManagerTest extends TestDataProvider {
private List<Integer> readCountsPerAlignmentStart;
private List<SAMRecord> reads;
private List<ArrayList<LocusIteratorByState.SAMRecordState>> recordStatesByAlignmentStart;
private int removalInterval;
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
super(PerSampleReadStateManagerTest.class);
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
this.removalInterval = removalInterval;
reads = new ArrayList<SAMRecord>();
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByState.SAMRecordState>>();
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
}
public void run() {
LocusIteratorByState libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
LocusIteratorByState.ReadStateManager readStateManager =
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
LocusIteratorByState.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
readStateManager.new PerSampleReadStateManager();
makeReads();
for ( ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
}
// read state manager should have the right number of reads
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
Iterator<LocusIteratorByState.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
int recordStateCount = 0;
int numReadStatesRemoved = 0;
// Do a first-pass validation of the record state iteration by making sure we get back everything we
// put in, in the same order, doing any requested removals of read states along the way
while ( recordStateIterator.hasNext() ) {
LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
recordStateCount++;
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
Assert.assertTrue(originalReadsIterator.hasNext());
SAMRecord originalRead = originalReadsIterator.next();
// The read we get back should be literally the same read in memory as we put in
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
// If requested, remove a read state every removalInterval states
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
recordStateIterator.remove();
numReadStatesRemoved++;
}
}
Assert.assertFalse(originalReadsIterator.hasNext());
// If we removed any read states, do a second pass through the read states to make sure the right
// states were removed
if ( numReadStatesRemoved > 0 ) {
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
originalReadsIterator = reads.iterator();
recordStateIterator = perSampleReadStateManager.iterator();
int readCount = 0;
int readStateCount = 0;
// Match record states with the reads that should remain after removal
while ( recordStateIterator.hasNext() ) {
LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
readStateCount++;
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
Assert.assertTrue(originalReadsIterator.hasNext());
SAMRecord originalRead = originalReadsIterator.next();
readCount++;
if ( readCount % removalInterval == 0 ) {
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
readCount++;
}
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
}
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
}
// Allow memory used by this test to be reclaimed
readCountsPerAlignmentStart = null;
reads = null;
recordStatesByAlignmentStart = null;
}
private void makeReads() {
int alignmentStart = 1;
for ( int readsThisStack : readCountsPerAlignmentStart ) {
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByState.SAMRecordState>();
for ( SAMRecord read : stackReads ) {
stackRecordStates.add(new LocusIteratorByState.SAMRecordState(read));
}
reads.addAll(stackReads);
recordStatesByAlignmentStart.add(stackRecordStates);
}
}
}
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
public Object[][] createPerSampleReadStateManagerTests() {
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
Arrays.asList(2),
Arrays.asList(10),
Arrays.asList(1, 1),
Arrays.asList(2, 2),
Arrays.asList(10, 10),
Arrays.asList(1, 10),
Arrays.asList(10, 1),
Arrays.asList(1, 1, 1),
Arrays.asList(2, 2, 2),
Arrays.asList(10, 10, 10),
Arrays.asList(1, 1, 1, 1, 1, 1),
Arrays.asList(10, 10, 10, 10, 10, 10),
Arrays.asList(1, 2, 10, 1, 2, 10)
) ) {
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
}
}
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
}
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
logger.warn("Running test: " + test);
test.run();
}
///////////////////////////////////////
// End Read State Manager Tests //
///////////////////////////////////////
///////////////////////////////////////
// Helper methods / classes //
///////////////////////////////////////
private static ReadProperties createTestReadProperties() {
return createTestReadProperties(null);
}
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),
new SAMFileHeader(),
SAMFileHeader.SortOrder.coordinate,
false,
SAMFileReader.ValidationStringency.STRICT,
null,
downsamplingMethod,
new ValidationExclusion(),
Collections.<ReadFilter>emptyList(),
Collections.<ReadTransformer>emptyList(),
@ -344,137 +513,136 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
(byte) -1
);
}
}
class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
Iterator<T> iterator;
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
public FakeCloseableIterator(Iterator<T> it) {
iterator = it;
}
@Override
public void close() {}
@Override
public boolean hasNext() {
return iterator.hasNext();
}
@Override
public T next() {
return iterator.next();
}
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
}
@Override
public void close() {}
private static final class LIBS_position {
@Override
public boolean hasNext() {
return iterator.hasNext();
}
SAMRecord read;
@Override
public T next() {
return iterator.next();
}
final int numOperators;
int currentOperatorIndex = 0;
int currentPositionOnOperator = 0;
int currentReadOffset = 0;
@Override
public void remove() {
throw new UnsupportedOperationException("Don't remove!");
}
}
boolean isBeforeDeletionStart = false;
boolean isBeforeDeletedBase = false;
boolean isAfterDeletionEnd = false;
boolean isAfterDeletedBase = false;
boolean isBeforeInsertion = false;
boolean isAfterInsertion = false;
boolean isNextToSoftClip = false;
boolean sawMop = false;
final class LIBS_position {
public LIBS_position(final SAMRecord read) {
this.read = read;
numOperators = read.getCigar().numCigarElements();
}
SAMRecord read;
public int getCurrentReadOffset() {
return Math.max(0, currentReadOffset - 1);
}
final int numOperators;
int currentOperatorIndex = 0;
int currentPositionOnOperator = 0;
int currentReadOffset = 0;
boolean isBeforeDeletionStart = false;
boolean isBeforeDeletedBase = false;
boolean isAfterDeletionEnd = false;
boolean isAfterDeletedBase = false;
boolean isBeforeInsertion = false;
boolean isAfterInsertion = false;
boolean isNextToSoftClip = false;
boolean sawMop = false;
public LIBS_position(final SAMRecord read) {
this.read = read;
numOperators = read.getCigar().numCigarElements();
}
public int getCurrentReadOffset() {
return Math.max(0, currentReadOffset - 1);
}
/**
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
*/
public boolean stepForwardOnGenome() {
if ( currentOperatorIndex == numOperators )
return false;
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
if ( currentPositionOnOperator >= curElement.getLength() ) {
if ( ++currentOperatorIndex == numOperators )
/**
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
*/
public boolean stepForwardOnGenome() {
if ( currentOperatorIndex == numOperators )
return false;
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
currentPositionOnOperator = 0;
}
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
if ( currentPositionOnOperator >= curElement.getLength() ) {
if ( ++currentOperatorIndex == numOperators )
return false;
switch ( curElement.getOperator() ) {
case I: // insertion w.r.t. the reference
if ( !sawMop )
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
currentPositionOnOperator = 0;
}
switch ( curElement.getOperator() ) {
case I: // insertion w.r.t. the reference
if ( !sawMop )
break;
case S: // soft clip
currentReadOffset += curElement.getLength();
case H: // hard clip
case P: // padding
currentOperatorIndex++;
return stepForwardOnGenome();
case D: // deletion w.r.t. the reference
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
currentPositionOnOperator++;
break;
case S: // soft clip
currentReadOffset += curElement.getLength();
case H: // hard clip
case P: // padding
currentOperatorIndex++;
return stepForwardOnGenome();
case D: // deletion w.r.t. the reference
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
currentPositionOnOperator++;
break;
case M:
case EQ:
case X:
sawMop = true;
currentReadOffset++;
currentPositionOnOperator++;
break;
default:
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
}
case M:
case EQ:
case X:
sawMop = true;
currentReadOffset++;
currentPositionOnOperator++;
break;
default:
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
final boolean isFirstOp = currentOperatorIndex == 0;
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
return true;
}
final boolean isFirstOp = currentOperatorIndex == 0;
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
private static boolean isBeforeOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isLastOp,
final boolean isLastBaseOfOp) {
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
}
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
return true;
}
private static boolean isBeforeOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isLastOp,
final boolean isLastBaseOfOp) {
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
}
private static boolean isAfterOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isFirstOp,
final boolean isFirstBaseOfOp) {
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
private static boolean isAfterOp(final Cigar cigar,
final int currentOperatorIndex,
final CigarOperator op,
final boolean isFirstOp,
final boolean isFirstBaseOfOp) {
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
}
}
}

View File

@ -109,12 +109,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
// TODO: test shard boundaries
intervals = new ArrayList<GenomeLoc>();
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100));
intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
@ -126,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700));
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
// required by LocusIteratorByState, and I prefer to list them in test case order above
@ -139,8 +144,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
verifyEqualIntervals(intervals, activeIntervals);
// TODO: more tests and edge cases
}
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
@ -171,8 +174,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
verifyActiveRegionCoverage(intervals, activeRegions);
// TODO: more tests and edge cases
}
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
@ -231,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// end_of_chr1: Primary in 1:249250600-249250621
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -245,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@ -256,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@ -267,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@ -278,9 +295,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
@Test
@ -300,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// end_of_chr1: Primary in 1:249250600-249250621
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -314,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@ -325,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@ -336,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@ -347,9 +379,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
getRead(region, "simple20");
// TODO: more tests and edge cases
}
@Test
@ -370,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// end_of_chr1: Primary in 1:249250600-249250621
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -384,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@ -395,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@ -406,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
getRead(region, "boundary_unequal");
getRead(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
verifyReadNotPlaced(region, "simple");
verifyReadNotPlaced(region, "overlap_equal");
verifyReadNotPlaced(region, "overlap_unequal");
verifyReadNotPlaced(region, "boundary_equal");
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
getRead(region, "end_of_chr1");
verifyReadNotPlaced(region, "simple20");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@ -417,9 +464,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
verifyReadNotPlaced(region, "boundary_unequal");
verifyReadNotPlaced(region, "extended_and_np");
verifyReadNotPlaced(region, "outside_intervals");
verifyReadNotPlaced(region, "end_of_chr1");
getRead(region, "simple20");
}
// TODO: more tests and edge cases
@Test
public void testUnmappedReads() {
// TODO
}
private void verifyReadNotPlaced(ActiveRegion region, String readName) {

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.ReadShardBalancer;
import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer;
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
@ -114,7 +114,7 @@ public class TraverseReadsUnitTest extends BaseTest {
@Test
public void testUnmappedReadCount() {
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer());
countReadWalker.initialize();
Object accumulator = countReadWalker.reduceInit();

View File

@ -0,0 +1,20 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import java.util.ArrayList;
public class PrintReadsLargeScaleTest extends WalkerTest {
@Test( timeOut = 1000 * 60 * 60 * 20 ) // 60 seconds * 60 seconds / minute * 20 minutes
public void testRealignerTargetCreator() {
WalkerTestSpec spec = new WalkerTestSpec(
"-R " + b37KGReference +
" -T PrintReads" +
" -I " + evaluationDataLocation + "CEUTrio.HiSeq.WEx.b37.NA12892.clean.dedup.recal.1.bam" +
" -o /dev/null",
0,
new ArrayList<String>(0));
executeTest("testPrintReadsWholeExomeChr1", spec);
}
}

View File

@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("e5f0e7a80cd392172ebf5ddb06b91a00"));
Arrays.asList("58e6281df108c361e99673a501ee4749"));
cvExecuteTest("threeWayWithRefs", spec, true);
}

View File

@ -23,14 +23,14 @@ public class LegacyReservoirDownsamplerUnitTest {
@Test
public void testEmptyIterator() {
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
}
@Test
public void testOneElementWithPoolSizeOne() {
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@ -42,7 +42,7 @@ public class LegacyReservoirDownsamplerUnitTest {
@Test
public void testOneElementWithPoolSizeGreaterThanOne() {
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@ -58,7 +58,7 @@ public class LegacyReservoirDownsamplerUnitTest {
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@ -78,7 +78,7 @@ public class LegacyReservoirDownsamplerUnitTest {
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@ -99,7 +99,7 @@ public class LegacyReservoirDownsamplerUnitTest {
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(0);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(0);
downsampler.addAll(reads);
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
@ -115,7 +115,7 @@ public class LegacyReservoirDownsamplerUnitTest {
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@ -128,7 +128,7 @@ public class LegacyReservoirDownsamplerUnitTest {
public void testFillingAcrossLoci() {
List<SAMRecord> reads = new ArrayList<SAMRecord>();
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@ -46,7 +45,7 @@ public class InputProducerUnitTest extends BaseTest {
final LinkedBlockingDeque<InputProducer<Integer>.InputValue> readQueue =
new LinkedBlockingDeque<InputProducer<Integer>.InputValue>(queueSize);
final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), new SimpleTimer(), readQueue);
final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), readQueue);
final ExecutorService es = Executors.newSingleThreadExecutor();
@ -94,7 +93,7 @@ public class InputProducerUnitTest extends BaseTest {
final LinkedBlockingDeque<InputProducer<Integer>.InputValue> readQueue =
new LinkedBlockingDeque<InputProducer<Integer>.InputValue>();
final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), new SimpleTimer(), readQueue);
final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), readQueue);
final ExecutorService es = Executors.newSingleThreadExecutor();
es.submit(ip);

View File

@ -188,17 +188,6 @@ public class NanoSchedulerUnitTest extends BaseTest {
Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected. Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks);
nanoScheduler.shutdown();
// TODO -- need to enable only in the case where there's serious time spend in
// TODO -- read /map / reduce, otherwise the "outside" timer doesn't add up
final double myTimeEstimate = timer.getElapsedTime();
final double tolerance = 0.1;
if ( false && myTimeEstimate > 0.1 ) {
Assert.assertTrue(nanoScheduler.getTotalRuntime() > myTimeEstimate * tolerance,
"NanoScheduler said that the total runtime was " + nanoScheduler.getTotalRuntime()
+ " but the overall test time was " + myTimeEstimate + ", beyond our tolerance factor of "
+ tolerance);
}
}
@Test(enabled = true && ! DEBUG, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
@ -243,7 +232,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
for ( final int nThreads : Arrays.asList(8) ) {
for ( final boolean addDelays : Arrays.asList(true, false) ) {
final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(bufSize, nThreads, 1, 1000000, false);
final int maxN = addDelays ? 10000 : 100000;
final int maxN = addDelays ? 1000 : 10000;
for ( int nElementsBeforeError = 0; nElementsBeforeError < maxN; nElementsBeforeError += Math.max(nElementsBeforeError / 10, 1) ) {
tests.add(new Object[]{nElementsBeforeError, test, addDelays});
}
@ -259,17 +248,22 @@ public class NanoSchedulerUnitTest extends BaseTest {
executeTestErrorThrowingInput(10, new NullPointerException(), exampleTest, false);
}
@Test(enabled = true, expectedExceptions = ReviewedStingException.class, timeOut = 10000)
@Test(enabled = true, expectedExceptions = ReviewedStingException.class, timeOut = 1000)
public void testInputErrorIsThrown_RSE() throws InterruptedException {
executeTestErrorThrowingInput(10, new ReviewedStingException("test"), exampleTest, false);
}
@Test(enabled = true, expectedExceptions = NullPointerException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 10000, invocationCount = 1)
public void testInputErrorDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
@Test(enabled = true, expectedExceptions = NullPointerException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 1000, invocationCount = 1)
public void testInputRuntimeExceptionDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
executeTestErrorThrowingInput(nElementsBeforeError, new NullPointerException(), test, addDelays);
}
private void executeTestErrorThrowingInput(final int nElementsBeforeError, final RuntimeException ex, final NanoSchedulerBasicTest test, final boolean addDelays) {
@Test(enabled = true, expectedExceptions = ReviewedStingException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 1000, invocationCount = 1)
public void testInputErrorDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
executeTestErrorThrowingInput(nElementsBeforeError, new Error(), test, addDelays);
}
private void executeTestErrorThrowingInput(final int nElementsBeforeError, final Throwable ex, final NanoSchedulerBasicTest test, final boolean addDelays) {
logger.warn("executeTestErrorThrowingInput " + nElementsBeforeError + " ex=" + ex + " test=" + test + " addInputDelays=" + addDelays);
final NanoScheduler<Integer, Integer, Integer> nanoScheduler = test.makeScheduler();
nanoScheduler.execute(new ErrorThrowingIterator(nElementsBeforeError, ex, addDelays), test.makeMap(), test.initReduce(), test.makeReduce());
@ -279,9 +273,9 @@ public class NanoSchedulerUnitTest extends BaseTest {
final int nElementsBeforeError;
final boolean addDelays;
int i = 0;
final RuntimeException ex;
final Throwable ex;
private ErrorThrowingIterator(final int nElementsBeforeError, RuntimeException ex, boolean addDelays) {
private ErrorThrowingIterator(final int nElementsBeforeError, Throwable ex, boolean addDelays) {
this.nElementsBeforeError = nElementsBeforeError;
this.ex = ex;
this.addDelays = addDelays;
@ -290,7 +284,12 @@ public class NanoSchedulerUnitTest extends BaseTest {
@Override public boolean hasNext() { return true; }
@Override public Integer next() {
if ( i++ > nElementsBeforeError ) {
throw ex;
if ( ex instanceof Error )
throw (Error)ex;
else if ( ex instanceof RuntimeException )
throw (RuntimeException)ex;
else
throw new RuntimeException("Bad exception " + ex);
} else if ( addDelays ) {
maybeDelayMe(i);
return i;

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
@ -93,7 +92,7 @@ public class ReducerUnitTest extends BaseTest {
final List<List<MapResult<Integer>>> jobGroups = Utils.groupList(allJobs, groupSize);
final ReduceSumTest reduce = new ReduceSumTest();
final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(reduce, new MultiThreadedErrorTracker(), new SimpleTimer(), 0);
final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(reduce, new MultiThreadedErrorTracker(), 0);
final TestWaitingForFinalReduce waitingThread = new TestWaitingForFinalReduce(reducer, expectedSum(allJobs));
final ExecutorService es = Executors.newSingleThreadExecutor();
@ -155,7 +154,7 @@ public class ReducerUnitTest extends BaseTest {
private void runSettingJobIDTwice() throws Exception {
final PriorityBlockingQueue<MapResult<Integer>> mapResultsQueue = new PriorityBlockingQueue<MapResult<Integer>>();
final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(new ReduceSumTest(), new MultiThreadedErrorTracker(), new SimpleTimer(), 0);
final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(new ReduceSumTest(), new MultiThreadedErrorTracker(), 0);
reducer.setTotalJobCount(10);
reducer.setTotalJobCount(15);