Merge Broad's GATK and CMI gatk
This commit is contained in:
commit
19372225af
|
|
@ -56,11 +56,14 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
|
||||
|
||||
// keep all of the reduced reads
|
||||
final ArrayList<PileupElement> reducedReadPileups = new ArrayList<PileupElement>();
|
||||
|
||||
// start by stratifying the reads by the alleles they represent at this position
|
||||
for( final PileupElement pe : pileup ) {
|
||||
// abort if we have a reduced read - we do not want to remove it!
|
||||
// we do not want to remove a reduced read
|
||||
if ( pe.getRead().isReducedRead() )
|
||||
return pileup;
|
||||
reducedReadPileups.add(pe);
|
||||
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
|
||||
if ( baseIndex != -1 )
|
||||
|
|
@ -76,6 +79,7 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
|
||||
}
|
||||
});
|
||||
elementsToKeep.addAll(reducedReadPileups);
|
||||
|
||||
// make a listing of allele counts
|
||||
final int[] alleleCounts = new int[4];
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ import java.util.*;
|
|||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
||||
|
||||
|
|
|
|||
|
|
@ -240,8 +240,8 @@ public class AFCalcPerformanceTest {
|
|||
if ( a.isNonReference() ) {
|
||||
final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
|
||||
logger.info("\t\t MLE " + a + ": " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
|
||||
final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
|
||||
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
|
||||
final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
|
||||
logger.info("\t\t Posterior " + a + ": " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -449,10 +448,10 @@ public class GenotypingEngine {
|
|||
|
||||
final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
|
||||
alleleMapper.get(refAllele).add(h);
|
||||
} else if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
|
||||
if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
|
||||
alleleMapper.get(h.getArtificialAllele()).add(h);
|
||||
} else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later
|
||||
undeterminedHaplotypes.add(h);
|
||||
} else {
|
||||
boolean haplotypeIsDetermined = false;
|
||||
for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
|
||||
|
|
|
|||
|
|
@ -80,11 +80,11 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("d20c7a143b899f0239bf64b652ad3edb"));
|
||||
Arrays.asList("97df6c2a8d390d43b9bdf56c979d9b09"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("fb204e821a24d03bd3a671b6e01c449a"));
|
||||
Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
|
||||
Arrays.asList("32f18ba50406cd8c8069ba07f2f89558"));
|
||||
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
|
||||
|
|
@ -451,13 +451,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("c1077662411164182c5f75478344f83d"));
|
||||
Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "dee6590e3b7079890bc3a9cb372c297e");
|
||||
testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest {
|
|||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final List<Double> pValues = new LinkedList<Double>();
|
||||
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
|
||||
for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
|
||||
for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) )
|
||||
for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) )
|
||||
pValues.add(p + espilon);
|
||||
|
||||
for ( final double pNonRef : pValues ) {
|
||||
|
|
@ -106,7 +106,7 @@ public class AFCalcResultUnitTest extends BaseTest {
|
|||
alleles,
|
||||
MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
|
||||
log10Even,
|
||||
Collections.singletonMap(C, Math.log10(pNonRef)));
|
||||
Collections.singletonMap(C, Math.log10(1 - pNonRef)));
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TestIsPolymorphic")
|
||||
|
|
|
|||
|
|
@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
|
||||
// must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs
|
||||
Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt));
|
||||
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt));
|
||||
Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < log10LAlleles.size(); i++ ) {
|
||||
final double log10LAllele1 = log10LAlleles.get(i);
|
||||
final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
|
||||
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0));
|
||||
final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
|
||||
originalPriors.add(result1);
|
||||
pNonRefN.add(log10pNonRef*(i+1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"e2b3bf420c45c677956a2e4a56d75ea2");
|
||||
"fe84caa79f59ecbd98fcbcd5b30ab164");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
|
|||
|
|
@ -445,13 +445,17 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
protected DownsamplingMethod getDownsamplingMethod() {
|
||||
GATKArgumentCollection argCollection = this.getArguments();
|
||||
boolean useExperimentalDownsampling = argCollection.enableExperimentalDownsampling;
|
||||
|
||||
// Legacy downsampler can only be selected via the command line, not via walker annotations
|
||||
boolean useLegacyDownsampler = argCollection.useLegacyDownsampler;
|
||||
|
||||
DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
|
||||
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useExperimentalDownsampling);
|
||||
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useExperimentalDownsampling);
|
||||
DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler);
|
||||
DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler);
|
||||
|
||||
return commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
|
||||
DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
|
||||
method.checkCompatibilityWithWalker(walker);
|
||||
return method;
|
||||
}
|
||||
|
||||
protected void setDownsamplingMethod(DownsamplingMethod method) {
|
||||
|
|
@ -580,9 +584,9 @@ public class GenomeAnalysisEngine {
|
|||
throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
|
||||
}
|
||||
|
||||
// Use the experimental ReadShardBalancer if experimental downsampling is enabled
|
||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useExperimentalDownsampling ?
|
||||
new ExperimentalReadShardBalancer() :
|
||||
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
|
||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
|
||||
new LegacyReadShardBalancer() :
|
||||
new ReadShardBalancer();
|
||||
|
||||
if(intervals == null)
|
||||
|
|
|
|||
|
|
@ -305,11 +305,23 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* Gets the type of downsampling method requested by the walker. If an alternative
|
||||
* downsampling method is specified on the command-line, the command-line version will
|
||||
* be used instead.
|
||||
* @param walkerClass The class of the walker to interrogate.
|
||||
* @param useExperimentalDownsampling If true, use the experimental downsampling implementation
|
||||
* @param walker The walker to interrogate.
|
||||
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
|
||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||
*/
|
||||
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useExperimentalDownsampling) {
|
||||
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) {
|
||||
return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the type of downsampling method requested by the walker. If an alternative
|
||||
* downsampling method is specified on the command-line, the command-line version will
|
||||
* be used instead.
|
||||
* @param walkerClass The class of the walker to interrogate.
|
||||
* @param useLegacyDownsampler If true, use the legacy downsampling implementation
|
||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||
*/
|
||||
public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useLegacyDownsampler) {
|
||||
DownsamplingMethod downsamplingMethod = null;
|
||||
|
||||
if( walkerClass.isAnnotationPresent(Downsample.class) ) {
|
||||
|
|
@ -317,7 +329,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
DownsampleType type = downsampleParameters.by();
|
||||
Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
|
||||
Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
|
||||
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useExperimentalDownsampling);
|
||||
downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler);
|
||||
}
|
||||
|
||||
return downsamplingMethod;
|
||||
|
|
@ -331,18 +343,6 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the type of downsampling method requested by the walker. If an alternative
|
||||
* downsampling method is specified on the command-line, the command-line version will
|
||||
* be used instead.
|
||||
* @param walker The walker to interrogate.
|
||||
* @param useExperimentalDownsampling If true, use the experimental downsampling implementation
|
||||
* @return The downsampling method, as specified by the walker. Null if none exists.
|
||||
*/
|
||||
public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useExperimentalDownsampling) {
|
||||
return getDownsamplingMethod(walker.getClass(), useExperimentalDownsampling);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a name for this type of walker.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -162,12 +162,11 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
||||
public Double downsampleFraction = null;
|
||||
|
||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
|
||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
|
||||
public Integer downsampleCoverage = null;
|
||||
|
||||
@Argument(fullName = "enable_experimental_downsampling", shortName = "enable_experimental_downsampling", doc = "Enable experimental engine-level downsampling", required = false)
|
||||
@Hidden
|
||||
public boolean enableExperimentalDownsampling = false;
|
||||
@Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false)
|
||||
public boolean useLegacyDownsampler = false;
|
||||
|
||||
/**
|
||||
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
|
||||
|
|
@ -178,7 +177,7 @@ public class GATKArgumentCollection {
|
|||
if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
|
||||
return null;
|
||||
|
||||
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, enableExperimentalDownsampling);
|
||||
return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -192,7 +191,7 @@ public class GATKArgumentCollection {
|
|||
downsamplingType = method.type;
|
||||
downsampleCoverage = method.toCoverage;
|
||||
downsampleFraction = method.toFraction;
|
||||
enableExperimentalDownsampling = method.useExperimentalDownsampling;
|
||||
useLegacyDownsampler = method.useLegacyDownsampler;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -136,11 +136,12 @@ public abstract class LocusView extends LocusIterator implements View {
|
|||
// Cache the current and apply filtering.
|
||||
AlignmentContext current = nextLocus;
|
||||
|
||||
// The old ALL_READS downsampling implementation -- only use if we're not using the new experimental downsampling:
|
||||
if( ! sourceInfo.getDownsamplingMethod().useExperimentalDownsampling &&
|
||||
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null ) {
|
||||
// The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested:
|
||||
if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler &&
|
||||
sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS &&
|
||||
sourceInfo.getDownsamplingMethod().toCoverage != null ) {
|
||||
|
||||
current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
|
||||
current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage);
|
||||
}
|
||||
|
||||
// Indicate that the next operation will need to advance.
|
||||
|
|
|
|||
|
|
@ -134,12 +134,11 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
|
||||
// Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling
|
||||
// TODO: clean this up once the experimental downsampling engine fork collapses
|
||||
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useExperimentalDownsampling ) {
|
||||
currentPosition = dataSource.getInitialReaderPositions();
|
||||
if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) {
|
||||
currentPosition = dataSource.getCurrentPosition();
|
||||
}
|
||||
else {
|
||||
currentPosition = dataSource.getCurrentPosition();
|
||||
|
||||
currentPosition = dataSource.getInitialReaderPositions();
|
||||
}
|
||||
|
||||
for(SAMReaderID reader: dataSource.getReaderIDs())
|
||||
|
|
|
|||
|
|
@ -1,228 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Convert from an unbalanced iterator over FilePointers to a balanced iterator over Shards.
|
||||
*
|
||||
* When processing FilePointers, our strategy is to aggregate all FilePointers for each contig
|
||||
* together into one monolithic FilePointer, create one persistent set of read iterators over
|
||||
* that monolithic FilePointer, and repeatedly use that persistent set of read iterators to
|
||||
* fill read shards with reads.
|
||||
*
|
||||
* This strategy has several important advantages:
|
||||
*
|
||||
* 1. We avoid issues with file span overlap. FilePointers that are more granular than a whole
|
||||
* contig will have regions that overlap with other FilePointers on the same contig, due
|
||||
* to the limited granularity of BAM index data. By creating only one FilePointer per contig,
|
||||
* we avoid having to track how much of each file region we've visited (as we did in the
|
||||
* former implementation), we avoid expensive non-sequential access patterns in the files,
|
||||
* and we avoid having to repeatedly re-create our iterator chain for every small region
|
||||
* of interest.
|
||||
*
|
||||
* 2. We avoid boundary issues with the engine-level downsampling. Since we create a single
|
||||
* persistent set of read iterators (which include the downsampling iterator(s)) per contig,
|
||||
* the downsampling process is never interrupted by FilePointer or Shard boundaries, and never
|
||||
* loses crucial state information while downsampling within a contig.
|
||||
*
|
||||
* TODO: There is also at least one important disadvantage:
|
||||
*
|
||||
* 1. We load more BAM index data into memory at once, and this work is done upfront before processing
|
||||
* the next contig, creating a delay before traversal of each contig. This delay may be
|
||||
* compensated for by the gains listed in #1 above, and we may be no worse off overall in
|
||||
* terms of total runtime, but we need to verify this empirically.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ExperimentalReadShardBalancer extends ShardBalancer {
|
||||
|
||||
private static Logger logger = Logger.getLogger(ExperimentalReadShardBalancer.class);
|
||||
|
||||
/**
|
||||
* Convert iterators of file pointers into balanced iterators of shards.
|
||||
* @return An iterator over balanced shards.
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return new Iterator<Shard>() {
|
||||
/**
|
||||
* The cached shard to be returned next. Prefetched in the peekable iterator style.
|
||||
*/
|
||||
private Shard nextShard = null;
|
||||
|
||||
/**
|
||||
* The file pointer currently being processed.
|
||||
*/
|
||||
private FilePointer currentContigFilePointer = null;
|
||||
|
||||
/**
|
||||
* Iterator over the reads from the current contig's file pointer. The same iterator will be
|
||||
* used to fill all shards associated with a given file pointer
|
||||
*/
|
||||
private PeekableIterator<SAMRecord> currentContigReadsIterator = null;
|
||||
|
||||
/**
|
||||
* How many FilePointers have we pulled from the filePointers iterator?
|
||||
*/
|
||||
private int totalFilePointersConsumed = 0;
|
||||
|
||||
/**
|
||||
* Have we encountered a monolithic FilePointer?
|
||||
*/
|
||||
private boolean encounteredMonolithicFilePointer = false;
|
||||
|
||||
|
||||
{
|
||||
createNextContigFilePointer();
|
||||
advance();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return nextShard != null;
|
||||
}
|
||||
|
||||
public Shard next() {
|
||||
if ( ! hasNext() )
|
||||
throw new NoSuchElementException("No next read shard available");
|
||||
Shard currentShard = nextShard;
|
||||
advance();
|
||||
return currentShard;
|
||||
}
|
||||
|
||||
private void advance() {
|
||||
nextShard = null;
|
||||
|
||||
// May need multiple iterations to fill the next shard if all reads in current file spans get filtered/downsampled away
|
||||
while ( nextShard == null && currentContigFilePointer != null ) {
|
||||
|
||||
// If we've exhausted the current file pointer of reads, move to the next file pointer (if there is one):
|
||||
if ( currentContigReadsIterator != null && ! currentContigReadsIterator.hasNext() ) {
|
||||
|
||||
// Close the old, exhausted chain of iterators to release resources
|
||||
currentContigReadsIterator.close();
|
||||
|
||||
// Advance to the FilePointer for the next contig
|
||||
createNextContigFilePointer();
|
||||
|
||||
// We'll need to create a fresh iterator for this file pointer when we create the first
|
||||
// shard for it below.
|
||||
currentContigReadsIterator = null;
|
||||
}
|
||||
|
||||
// At this point our currentContigReadsIterator may be null or non-null depending on whether or not
|
||||
// this is our first shard for this file pointer.
|
||||
if ( currentContigFilePointer != null ) {
|
||||
Shard shard = new ReadShard(parser,readsDataSource, currentContigFilePointer.fileSpans, currentContigFilePointer.locations, currentContigFilePointer.isRegionUnmapped);
|
||||
|
||||
// Create a new reads iterator only when we've just advanced to the file pointer for the next
|
||||
// contig. It's essential that the iterators persist across all shards that share the same contig
|
||||
// to allow the downsampling to work properly.
|
||||
if ( currentContigReadsIterator == null ) {
|
||||
currentContigReadsIterator = new PeekableIterator<SAMRecord>(readsDataSource.getIterator(shard));
|
||||
}
|
||||
|
||||
if ( currentContigReadsIterator.hasNext() ) {
|
||||
shard.fill(currentContigReadsIterator);
|
||||
nextShard = shard;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate all FilePointers for the next contig together into one monolithic FilePointer
|
||||
* to avoid boundary issues with visiting the same file regions more than once (since more
|
||||
* granular FilePointers will have regions that overlap with other nearby FilePointers due
|
||||
* to the nature of BAM indices).
|
||||
*
|
||||
* By creating one persistent set of iterators per contig we also avoid boundary artifacts
|
||||
* in the engine-level downsampling.
|
||||
*
|
||||
* TODO: This FilePointer aggregation should ideally be done at the BAMSchedule level for
|
||||
* TODO: read traversals, as there's little point in the BAMSchedule emitting extremely
|
||||
* TODO: granular FilePointers if we're just going to union them. The BAMSchedule should
|
||||
* TODO: emit one FilePointer per contig for read traversals (but, crucially, NOT for
|
||||
* TODO: locus traversals).
|
||||
*/
|
||||
private void createNextContigFilePointer() {
|
||||
currentContigFilePointer = null;
|
||||
List<FilePointer> nextContigFilePointers = new ArrayList<FilePointer>();
|
||||
|
||||
logger.info("Loading BAM index data for next contig");
|
||||
|
||||
while ( filePointers.hasNext() ) {
|
||||
|
||||
// Make sure that if we see a monolithic FilePointer (representing all regions in all files) that
|
||||
// it is the ONLY FilePointer we ever encounter
|
||||
if ( encounteredMonolithicFilePointer ) {
|
||||
throw new ReviewedStingException("Bug: encountered additional FilePointers after encountering a monolithic FilePointer");
|
||||
}
|
||||
if ( filePointers.peek().isMonolithic() ) {
|
||||
if ( totalFilePointersConsumed > 0 ) {
|
||||
throw new ReviewedStingException("Bug: encountered additional FilePointers before encountering a monolithic FilePointer");
|
||||
}
|
||||
encounteredMonolithicFilePointer = true;
|
||||
logger.debug(String.format("Encountered monolithic FilePointer: %s", filePointers.peek()));
|
||||
}
|
||||
|
||||
// If this is the first FP we've seen, or we're dealing with mapped regions and the next FP is on the
|
||||
// same contig as previous FPs, or all our FPs are unmapped, add the next FP to the list of FPs to merge
|
||||
if ( nextContigFilePointers.isEmpty() ||
|
||||
(! nextContigFilePointers.get(0).isRegionUnmapped && ! filePointers.peek().isRegionUnmapped &&
|
||||
nextContigFilePointers.get(0).getContigIndex() == filePointers.peek().getContigIndex()) ||
|
||||
(nextContigFilePointers.get(0).isRegionUnmapped && filePointers.peek().isRegionUnmapped) ) {
|
||||
|
||||
nextContigFilePointers.add(filePointers.next());
|
||||
totalFilePointersConsumed++;
|
||||
}
|
||||
else {
|
||||
break; // next FilePointer is on a different contig or has different mapped/unmapped status,
|
||||
// save it for next time
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! nextContigFilePointers.isEmpty() ) {
|
||||
currentContigFilePointer = FilePointer.union(nextContigFilePointers, parser);
|
||||
}
|
||||
|
||||
if ( currentContigFilePointer != null ) {
|
||||
logger.info("Done loading BAM index data for next contig");
|
||||
logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer));
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.GATKBAMFileSpan;
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
/**
|
||||
* Divide up large file pointers containing reads into more manageable subcomponents.
|
||||
*
|
||||
* TODO: delete this class once the experimental downsampling engine fork collapses
|
||||
*/
|
||||
public class LegacyReadShardBalancer extends ShardBalancer {
|
||||
/**
|
||||
* Convert iterators of file pointers into balanced iterators of shards.
|
||||
* @return An iterator over balanced shards.
|
||||
*/
|
||||
public Iterator<Shard> iterator() {
|
||||
return new Iterator<Shard>() {
|
||||
/**
|
||||
* The cached shard to be returned next. Prefetched in the peekable iterator style.
|
||||
*/
|
||||
private Shard nextShard = null;
|
||||
|
||||
/**
|
||||
* The file pointer currently being processed.
|
||||
*/
|
||||
private FilePointer currentFilePointer;
|
||||
|
||||
/**
|
||||
* Ending position of the last shard in the file.
|
||||
*/
|
||||
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
|
||||
|
||||
{
|
||||
if(filePointers.hasNext())
|
||||
currentFilePointer = filePointers.next();
|
||||
advance();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return nextShard != null;
|
||||
}
|
||||
|
||||
public Shard next() {
|
||||
if(!hasNext())
|
||||
throw new NoSuchElementException("No next read shard available");
|
||||
Shard currentShard = nextShard;
|
||||
advance();
|
||||
return currentShard;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
|
||||
}
|
||||
|
||||
private void advance() {
|
||||
Map<SAMReaderID,SAMFileSpan> shardPosition;
|
||||
nextShard = null;
|
||||
|
||||
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
||||
shardPosition = currentFilePointer.fileSpans;
|
||||
|
||||
for(SAMReaderID id: shardPosition.keySet()) {
|
||||
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
|
||||
selectedReaders.put(id,fileSpan);
|
||||
}
|
||||
|
||||
if(!isEmpty(selectedReaders)) {
|
||||
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
readsDataSource.fillShard(shard);
|
||||
|
||||
if(!shard.isBufferEmpty()) {
|
||||
nextShard = shard;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
selectedReaders.clear();
|
||||
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
|
||||
}
|
||||
|
||||
position = readsDataSource.getCurrentPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects whether the list of file spans contain any read data.
|
||||
* @param selectedSpans Mapping of readers to file spans.
|
||||
* @return True if file spans are completely empty; false otherwise.
|
||||
*/
|
||||
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
|
||||
for(SAMFileSpan fileSpan: selectedSpans.values()) {
|
||||
if(!fileSpan.isEmpty())
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -24,20 +24,49 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.GATKBAMFileSpan;
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Divide up large file pointers containing reads into more manageable subcomponents.
|
||||
* Convert from an unbalanced iterator over FilePointers to a balanced iterator over Shards.
|
||||
*
|
||||
* TODO: delete this class once the experimental downsampling engine fork collapses
|
||||
* When processing FilePointers, our strategy is to aggregate all FilePointers for each contig
|
||||
* together into one monolithic FilePointer, create one persistent set of read iterators over
|
||||
* that monolithic FilePointer, and repeatedly use that persistent set of read iterators to
|
||||
* fill read shards with reads.
|
||||
*
|
||||
* This strategy has several important advantages:
|
||||
*
|
||||
* 1. We avoid issues with file span overlap. FilePointers that are more granular than a whole
|
||||
* contig will have regions that overlap with other FilePointers on the same contig, due
|
||||
* to the limited granularity of BAM index data. By creating only one FilePointer per contig,
|
||||
* we avoid having to track how much of each file region we've visited (as we did in the
|
||||
* former implementation), we avoid expensive non-sequential access patterns in the files,
|
||||
* and we avoid having to repeatedly re-create our iterator chain for every small region
|
||||
* of interest.
|
||||
*
|
||||
* 2. We avoid boundary issues with the engine-level downsampling. Since we create a single
|
||||
* persistent set of read iterators (which include the downsampling iterator(s)) per contig,
|
||||
* the downsampling process is never interrupted by FilePointer or Shard boundaries, and never
|
||||
* loses crucial state information while downsampling within a contig.
|
||||
*
|
||||
* TODO: There is also at least one important disadvantage:
|
||||
*
|
||||
* 1. We load more BAM index data into memory at once, and this work is done upfront before processing
|
||||
* the next contig, creating a delay before traversal of each contig. This delay may be
|
||||
* compensated for by the gains listed in #1 above, and we may be no worse off overall in
|
||||
* terms of total runtime, but we need to verify this empirically.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class ReadShardBalancer extends ShardBalancer {
|
||||
|
||||
private static Logger logger = Logger.getLogger(ReadShardBalancer.class);
|
||||
|
||||
/**
|
||||
* Convert iterators of file pointers into balanced iterators of shards.
|
||||
* @return An iterator over balanced shards.
|
||||
|
|
@ -52,16 +81,27 @@ public class ReadShardBalancer extends ShardBalancer {
|
|||
/**
|
||||
* The file pointer currently being processed.
|
||||
*/
|
||||
private FilePointer currentFilePointer;
|
||||
private FilePointer currentContigFilePointer = null;
|
||||
|
||||
/**
|
||||
* Ending position of the last shard in the file.
|
||||
* Iterator over the reads from the current contig's file pointer. The same iterator will be
|
||||
* used to fill all shards associated with a given file pointer
|
||||
*/
|
||||
private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
|
||||
private PeekableIterator<SAMRecord> currentContigReadsIterator = null;
|
||||
|
||||
/**
|
||||
* How many FilePointers have we pulled from the filePointers iterator?
|
||||
*/
|
||||
private int totalFilePointersConsumed = 0;
|
||||
|
||||
/**
|
||||
* Have we encountered a monolithic FilePointer?
|
||||
*/
|
||||
private boolean encounteredMonolithicFilePointer = false;
|
||||
|
||||
|
||||
{
|
||||
if(filePointers.hasNext())
|
||||
currentFilePointer = filePointers.next();
|
||||
createNextContigFilePointer();
|
||||
advance();
|
||||
}
|
||||
|
||||
|
|
@ -70,58 +110,117 @@ public class ReadShardBalancer extends ShardBalancer {
|
|||
}
|
||||
|
||||
public Shard next() {
|
||||
if(!hasNext())
|
||||
if ( ! hasNext() )
|
||||
throw new NoSuchElementException("No next read shard available");
|
||||
Shard currentShard = nextShard;
|
||||
advance();
|
||||
return currentShard;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
|
||||
}
|
||||
|
||||
private void advance() {
|
||||
Map<SAMReaderID,SAMFileSpan> shardPosition;
|
||||
nextShard = null;
|
||||
|
||||
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
||||
shardPosition = currentFilePointer.fileSpans;
|
||||
// May need multiple iterations to fill the next shard if all reads in current file spans get filtered/downsampled away
|
||||
while ( nextShard == null && currentContigFilePointer != null ) {
|
||||
|
||||
for(SAMReaderID id: shardPosition.keySet()) {
|
||||
SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
|
||||
selectedReaders.put(id,fileSpan);
|
||||
// If we've exhausted the current file pointer of reads, move to the next file pointer (if there is one):
|
||||
if ( currentContigReadsIterator != null && ! currentContigReadsIterator.hasNext() ) {
|
||||
|
||||
// Close the old, exhausted chain of iterators to release resources
|
||||
currentContigReadsIterator.close();
|
||||
|
||||
// Advance to the FilePointer for the next contig
|
||||
createNextContigFilePointer();
|
||||
|
||||
// We'll need to create a fresh iterator for this file pointer when we create the first
|
||||
// shard for it below.
|
||||
currentContigReadsIterator = null;
|
||||
}
|
||||
|
||||
if(!isEmpty(selectedReaders)) {
|
||||
Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
|
||||
readsDataSource.fillShard(shard);
|
||||
// At this point our currentContigReadsIterator may be null or non-null depending on whether or not
|
||||
// this is our first shard for this file pointer.
|
||||
if ( currentContigFilePointer != null ) {
|
||||
Shard shard = new ReadShard(parser,readsDataSource, currentContigFilePointer.fileSpans, currentContigFilePointer.locations, currentContigFilePointer.isRegionUnmapped);
|
||||
|
||||
if(!shard.isBufferEmpty()) {
|
||||
// Create a new reads iterator only when we've just advanced to the file pointer for the next
|
||||
// contig. It's essential that the iterators persist across all shards that share the same contig
|
||||
// to allow the downsampling to work properly.
|
||||
if ( currentContigReadsIterator == null ) {
|
||||
currentContigReadsIterator = new PeekableIterator<SAMRecord>(readsDataSource.getIterator(shard));
|
||||
}
|
||||
|
||||
if ( currentContigReadsIterator.hasNext() ) {
|
||||
shard.fill(currentContigReadsIterator);
|
||||
nextShard = shard;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
selectedReaders.clear();
|
||||
currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
|
||||
}
|
||||
|
||||
position = readsDataSource.getCurrentPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects whether the list of file spans contain any read data.
|
||||
* @param selectedSpans Mapping of readers to file spans.
|
||||
* @return True if file spans are completely empty; false otherwise.
|
||||
* Aggregate all FilePointers for the next contig together into one monolithic FilePointer
|
||||
* to avoid boundary issues with visiting the same file regions more than once (since more
|
||||
* granular FilePointers will have regions that overlap with other nearby FilePointers due
|
||||
* to the nature of BAM indices).
|
||||
*
|
||||
* By creating one persistent set of iterators per contig we also avoid boundary artifacts
|
||||
* in the engine-level downsampling.
|
||||
*
|
||||
* TODO: This FilePointer aggregation should ideally be done at the BAMSchedule level for
|
||||
* TODO: read traversals, as there's little point in the BAMSchedule emitting extremely
|
||||
* TODO: granular FilePointers if we're just going to union them. The BAMSchedule should
|
||||
* TODO: emit one FilePointer per contig for read traversals (but, crucially, NOT for
|
||||
* TODO: locus traversals).
|
||||
*/
|
||||
private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
|
||||
for(SAMFileSpan fileSpan: selectedSpans.values()) {
|
||||
if(!fileSpan.isEmpty())
|
||||
return false;
|
||||
private void createNextContigFilePointer() {
|
||||
currentContigFilePointer = null;
|
||||
List<FilePointer> nextContigFilePointers = new ArrayList<FilePointer>();
|
||||
|
||||
logger.info("Loading BAM index data for next contig");
|
||||
|
||||
while ( filePointers.hasNext() ) {
|
||||
|
||||
// Make sure that if we see a monolithic FilePointer (representing all regions in all files) that
|
||||
// it is the ONLY FilePointer we ever encounter
|
||||
if ( encounteredMonolithicFilePointer ) {
|
||||
throw new ReviewedStingException("Bug: encountered additional FilePointers after encountering a monolithic FilePointer");
|
||||
}
|
||||
if ( filePointers.peek().isMonolithic() ) {
|
||||
if ( totalFilePointersConsumed > 0 ) {
|
||||
throw new ReviewedStingException("Bug: encountered additional FilePointers before encountering a monolithic FilePointer");
|
||||
}
|
||||
encounteredMonolithicFilePointer = true;
|
||||
logger.debug(String.format("Encountered monolithic FilePointer: %s", filePointers.peek()));
|
||||
}
|
||||
|
||||
// If this is the first FP we've seen, or we're dealing with mapped regions and the next FP is on the
|
||||
// same contig as previous FPs, or all our FPs are unmapped, add the next FP to the list of FPs to merge
|
||||
if ( nextContigFilePointers.isEmpty() ||
|
||||
(! nextContigFilePointers.get(0).isRegionUnmapped && ! filePointers.peek().isRegionUnmapped &&
|
||||
nextContigFilePointers.get(0).getContigIndex() == filePointers.peek().getContigIndex()) ||
|
||||
(nextContigFilePointers.get(0).isRegionUnmapped && filePointers.peek().isRegionUnmapped) ) {
|
||||
|
||||
nextContigFilePointers.add(filePointers.next());
|
||||
totalFilePointersConsumed++;
|
||||
}
|
||||
else {
|
||||
break; // next FilePointer is on a different contig or has different mapped/unmapped status,
|
||||
// save it for next time
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
if ( ! nextContigFilePointers.isEmpty() ) {
|
||||
currentContigFilePointer = FilePointer.union(nextContigFilePointers, parser);
|
||||
}
|
||||
|
||||
if ( currentContigFilePointer != null ) {
|
||||
logger.info("Done loading BAM index data for next contig");
|
||||
logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer));
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -466,7 +466,7 @@ public class SAMDataSource {
|
|||
/**
|
||||
* Legacy method to fill the given buffering shard with reads.
|
||||
*
|
||||
* Shard.fill() is used instead of this method when experimental downsampling is enabled
|
||||
* Shard.fill() is used instead of this method unless legacy downsampling is enabled
|
||||
*
|
||||
* TODO: delete this method once the experimental downsampling engine fork collapses
|
||||
*
|
||||
|
|
@ -638,7 +638,8 @@ public class SAMDataSource {
|
|||
readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
|
||||
readProperties.getSupplementalFilters(),
|
||||
readProperties.getReadTransformers(),
|
||||
readProperties.defaultBaseQualities());
|
||||
readProperties.defaultBaseQualities(),
|
||||
shard instanceof LocusShard);
|
||||
}
|
||||
|
||||
private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
|
||||
|
|
@ -695,6 +696,7 @@ public class SAMDataSource {
|
|||
* @param noValidationOfReadOrder Another trigger for the verifying iterator? TODO: look into this.
|
||||
* @param supplementalFilters additional filters to apply to the reads.
|
||||
* @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
|
||||
* @param isLocusBasedTraversal true if we're dealing with a read stream from a LocusShard
|
||||
* @return An iterator wrapped with filters reflecting the passed-in parameters. Will not be null.
|
||||
*/
|
||||
protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics,
|
||||
|
|
@ -705,7 +707,8 @@ public class SAMDataSource {
|
|||
Boolean noValidationOfReadOrder,
|
||||
Collection<ReadFilter> supplementalFilters,
|
||||
List<ReadTransformer> readTransformers,
|
||||
byte defaultBaseQualities) {
|
||||
byte defaultBaseQualities,
|
||||
boolean isLocusBasedTraversal ) {
|
||||
|
||||
// ************************************************************************************************ //
|
||||
// * NOTE: ALL FILTERING/DOWNSAMPLING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
|
||||
|
|
@ -714,12 +717,26 @@ public class SAMDataSource {
|
|||
|
||||
wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
|
||||
|
||||
if ( readProperties.getDownsamplingMethod().useExperimentalDownsampling ) {
|
||||
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
|
||||
// If we're using the new downsampling implementation, apply downsampling iterators at this
|
||||
// point in the read stream for most (but not all) cases
|
||||
if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) {
|
||||
|
||||
// For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
|
||||
// will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
|
||||
// splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
|
||||
// of individual reads.
|
||||
boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
|
||||
readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readProperties.getDownsamplingMethod().toCoverage != null;
|
||||
|
||||
if ( ! assumeDownstreamLIBSDownsampling ) {
|
||||
wrappedIterator = applyDownsamplingIterator(wrappedIterator);
|
||||
}
|
||||
}
|
||||
|
||||
// Use the old fractional downsampler only if we're not using experimental downsampling:
|
||||
if ( ! readProperties.getDownsamplingMethod().useExperimentalDownsampling && downsamplingFraction != null )
|
||||
// Use the old fractional downsampler only if we're using legacy downsampling:
|
||||
// TODO: remove this statement (and associated classes) once the downsampling engine fork collapses
|
||||
if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null )
|
||||
wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);
|
||||
|
||||
// unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
|
||||
|
|
@ -741,19 +758,37 @@ public class SAMDataSource {
|
|||
}
|
||||
|
||||
protected StingSAMIterator applyDownsamplingIterator( StingSAMIterator wrappedIterator ) {
|
||||
if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
|
||||
ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = readProperties.getDownsamplingMethod().toCoverage != null ?
|
||||
new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
|
||||
new FractionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
|
||||
|
||||
return new PerSampleDownsamplingReadsIterator(wrappedIterator, downsamplerFactory);
|
||||
if ( readProperties.getDownsamplingMethod() == null ||
|
||||
readProperties.getDownsamplingMethod().type == DownsampleType.NONE ) {
|
||||
return wrappedIterator;
|
||||
}
|
||||
else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
|
||||
ReadsDownsampler<SAMRecord> downsampler = readProperties.getDownsamplingMethod().toCoverage != null ?
|
||||
new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
|
||||
new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
|
||||
|
||||
return new DownsamplingReadsIterator(wrappedIterator, downsampler);
|
||||
if ( readProperties.getDownsamplingMethod().toFraction != null ) {
|
||||
|
||||
// If we're downsampling to a fraction of reads, there's no point in paying the cost of
|
||||
// splitting/re-assembling the read stream by sample to run the FractionalDownsampler on
|
||||
// reads from each sample separately, since the result would be the same as running the
|
||||
// FractionalDownsampler on the entire stream. So, ALWAYS use the DownsamplingReadsIterator
|
||||
// rather than the PerSampleDownsamplingReadsIterator, even if BY_SAMPLE downsampling
|
||||
// was requested.
|
||||
|
||||
return new DownsamplingReadsIterator(wrappedIterator,
|
||||
new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction));
|
||||
}
|
||||
else if ( readProperties.getDownsamplingMethod().toCoverage != null ) {
|
||||
|
||||
// If we're downsampling to coverage, we DO need to pay the cost of splitting/re-assembling
|
||||
// the read stream to run the downsampler on the reads for each individual sample separately if
|
||||
// BY_SAMPLE downsampling was requested.
|
||||
|
||||
if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
|
||||
return new PerSampleDownsamplingReadsIterator(wrappedIterator,
|
||||
new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage));
|
||||
}
|
||||
else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
|
||||
return new DownsamplingReadsIterator(wrappedIterator,
|
||||
new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage));
|
||||
}
|
||||
}
|
||||
|
||||
return wrappedIterator;
|
||||
|
|
|
|||
|
|
@ -50,9 +50,9 @@ public class DownsamplingMethod {
|
|||
public final Double toFraction;
|
||||
|
||||
/**
|
||||
* Use the new experimental downsampling?
|
||||
* Use the legacy downsampling implementation instead of the newer implementation?
|
||||
*/
|
||||
public final boolean useExperimentalDownsampling;
|
||||
public final boolean useLegacyDownsampler;
|
||||
|
||||
/**
|
||||
* Expresses no downsampling applied at all.
|
||||
|
|
@ -69,11 +69,11 @@ public class DownsamplingMethod {
|
|||
*/
|
||||
public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
|
||||
|
||||
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) {
|
||||
public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) {
|
||||
this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
|
||||
this.toCoverage = toCoverage;
|
||||
this.toFraction = toFraction;
|
||||
this.useExperimentalDownsampling = useExperimentalDownsampling;
|
||||
this.useLegacyDownsampler = useLegacyDownsampler;
|
||||
|
||||
if ( type == DownsampleType.NONE ) {
|
||||
toCoverage = null;
|
||||
|
|
@ -101,19 +101,19 @@ public class DownsamplingMethod {
|
|||
if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
|
||||
throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
|
||||
}
|
||||
}
|
||||
|
||||
// Some restrictions only exist for the old downsampling implementation:
|
||||
if ( ! useExperimentalDownsampling ) {
|
||||
// By sample downsampling does not work with a fraction of reads in the old downsampling implementation
|
||||
if( type == DownsampleType.BY_SAMPLE && toFraction != null )
|
||||
throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method");
|
||||
public void checkCompatibilityWithWalker( Walker walker ) {
|
||||
boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
|
||||
|
||||
if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) {
|
||||
throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " +
|
||||
"The newer downsampling implementation does not have this limitation.");
|
||||
}
|
||||
|
||||
// Some restrictions only exist for the new downsampling implementation:
|
||||
if ( useExperimentalDownsampling ) {
|
||||
if ( type == DownsampleType.ALL_READS && toCoverage != null ) {
|
||||
throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation");
|
||||
}
|
||||
if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) {
|
||||
throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " +
|
||||
"You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended.");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -124,30 +124,34 @@ public class DownsamplingMethod {
|
|||
builder.append("No downsampling");
|
||||
}
|
||||
else {
|
||||
builder.append(String.format("Method: %s ", type));
|
||||
builder.append(String.format("Method: %s, ", type));
|
||||
|
||||
if ( toCoverage != null ) {
|
||||
builder.append(String.format("Target Coverage: %d ", toCoverage));
|
||||
builder.append(String.format("Target Coverage: %d, ", toCoverage));
|
||||
}
|
||||
else {
|
||||
builder.append(String.format("Target Fraction: %.2f ", toFraction));
|
||||
builder.append(String.format("Target Fraction: %.2f, ", toFraction));
|
||||
}
|
||||
|
||||
if ( useExperimentalDownsampling ) {
|
||||
builder.append("Using Experimental Downsampling");
|
||||
if ( useLegacyDownsampler ) {
|
||||
builder.append("Using the legacy downsampling implementation");
|
||||
}
|
||||
else {
|
||||
builder.append("Using the new downsampling implementation");
|
||||
}
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) {
|
||||
public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) {
|
||||
if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
|
||||
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
|
||||
null, useExperimentalDownsampling);
|
||||
null, useLegacyDownsampler);
|
||||
}
|
||||
else {
|
||||
return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling);
|
||||
// Downsampling is off by default for non-locus-based traversals
|
||||
return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Pass-Through Downsampler: Implementation of the ReadsDownsampler interface that does no
|
||||
* downsampling whatsoever, and instead simply "passes-through" all the reads it's given.
|
||||
* Useful for situations where you want to disable downsampling, but still need to use
|
||||
* the downsampler interface.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class PassThroughDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
|
||||
|
||||
private ArrayList<T> selectedReads;
|
||||
|
||||
public PassThroughDownsampler() {
|
||||
clear();
|
||||
}
|
||||
|
||||
public void submit( T newRead ) {
|
||||
// All reads pass-through, no reads get downsampled
|
||||
selectedReads.add(newRead);
|
||||
}
|
||||
|
||||
public void submit( Collection<T> newReads ) {
|
||||
for ( T read : newReads ) {
|
||||
submit(read);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasFinalizedItems() {
|
||||
return selectedReads.size() > 0;
|
||||
}
|
||||
|
||||
public List<T> consumeFinalizedItems() {
|
||||
// pass by reference rather than make a copy, for speed
|
||||
List<T> downsampledItems = selectedReads;
|
||||
clear();
|
||||
return downsampledItems;
|
||||
}
|
||||
|
||||
public boolean hasPendingItems() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public T peekFinalized() {
|
||||
return selectedReads.isEmpty() ? null : selectedReads.get(0);
|
||||
}
|
||||
|
||||
public T peekPending() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public int getNumberOfDiscardedItems() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
public void signalEndOfInput() {
|
||||
// NO-OP
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
selectedReads = new ArrayList<T>();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
// NO-OP
|
||||
}
|
||||
|
||||
public boolean requiresCoordinateSortOrder() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public void signalNoMoreReadsBefore( T read ) {
|
||||
// NO-OP
|
||||
}
|
||||
}
|
||||
|
|
@ -4,9 +4,9 @@ import net.sf.picard.util.PeekableIterator;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByStateExperimental;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -83,17 +83,18 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
this.sourceInfo = shard.getReadProperties();
|
||||
this.readIterator = iterator;
|
||||
|
||||
// Temporary: use the experimental version of LocusIteratorByState if experimental downsampling was requested:
|
||||
this.sourceIterator = sourceInfo.getDownsamplingMethod().useExperimentalDownsampling ?
|
||||
new PeekableIterator<AlignmentContext>(new LocusIteratorByStateExperimental(iterator,sourceInfo,genomeLocParser, sampleNames))
|
||||
// Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
|
||||
this.sourceIterator = sourceInfo.getDownsamplingMethod().useLegacyDownsampler ?
|
||||
new PeekableIterator<AlignmentContext>(new LegacyLocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames))
|
||||
:
|
||||
new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
|
||||
new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames));
|
||||
|
||||
|
||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||
}
|
||||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
|
||||
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
this(shard, genomeLocParser, iterator, intervals, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
public Iterator<WindowMakerIterator> iterator() {
|
||||
|
|
|
|||
|
|
@ -31,14 +31,14 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.Downsampler;
|
||||
import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.LegacyReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
|
@ -50,11 +50,11 @@ import java.util.*;
|
|||
/**
|
||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||
*/
|
||||
public class LocusIteratorByStateExperimental extends LocusIterator {
|
||||
public class LegacyLocusIteratorByState extends LocusIterator {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||
private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -69,7 +69,7 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
|
||||
protected static class SAMRecordState {
|
||||
static private class SAMRecordState {
|
||||
SAMRecord read;
|
||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||
|
|
@ -213,7 +213,6 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
//final boolean DEBUG2 = false && DEBUG;
|
||||
private ReadProperties readInfo;
|
||||
private AlignmentContext nextAlignmentContext;
|
||||
private boolean performLevelingDownsampling;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -221,15 +220,11 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByStateExperimental(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
|
||||
public LegacyLocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
|
||||
this.readInfo = readInformation;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator);
|
||||
|
||||
this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readInfo.getDownsamplingMethod().toCoverage != null;
|
||||
this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
|
|
@ -290,13 +285,11 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
|
||||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
|
||||
// TODO: How can you determine here whether the current pileup has been downsampled?
|
||||
boolean hasBeenSampled = false;
|
||||
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
||||
|
|
@ -405,20 +398,34 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
protected class ReadStateManager {
|
||||
private class ReadStateManager {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
private final int targetCoverage;
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source) {
|
||||
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager());
|
||||
this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
|
||||
switch (this.downsamplingMethod.type) {
|
||||
case BY_SAMPLE:
|
||||
if (downsamplingMethod.toCoverage == null)
|
||||
throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
||||
this.targetCoverage = downsamplingMethod.toCoverage;
|
||||
break;
|
||||
default:
|
||||
this.targetCoverage = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner();
|
||||
Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager());
|
||||
readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(readSelectors);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -442,6 +449,7 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
totalReadStates--;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -469,6 +477,17 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
/**
|
||||
* The extent of downsampling; basically, the furthest base out which has 'fallen
|
||||
* victim' to the downsampler.
|
||||
*
|
||||
* @param sample Sample, downsampled independently.
|
||||
* @return Integer stop of the furthest undownsampled region.
|
||||
*/
|
||||
public int getDownsamplingExtent(final String sample) {
|
||||
return readStatesBySample.get(sample).getDownsamplingExtent();
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
|
|
@ -501,13 +520,61 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
samplePartitioner.complete();
|
||||
|
||||
for (final String sample : samples) {
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
|
||||
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
int numReads = statesBySample.size();
|
||||
int downsamplingExtent = aggregator.getDownsamplingExtent();
|
||||
|
||||
if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
|
||||
long readLimit = aggregator.getNumReadsSeen();
|
||||
addReadsToSample(statesBySample, newReads, readLimit);
|
||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||
} else {
|
||||
int[] counts = statesBySample.getCountsPerAlignmentStart();
|
||||
int[] updatedCounts = new int[counts.length];
|
||||
System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
|
||||
|
||||
boolean readPruned = true;
|
||||
while (numReads + newReads.size() > targetCoverage && readPruned) {
|
||||
readPruned = false;
|
||||
for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
|
||||
if (updatedCounts[alignmentStart] > 1) {
|
||||
updatedCounts[alignmentStart]--;
|
||||
numReads--;
|
||||
readPruned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numReads == targetCoverage) {
|
||||
updatedCounts[0]--;
|
||||
numReads--;
|
||||
}
|
||||
|
||||
BitSet toPurge = new BitSet(readStates.size());
|
||||
int readOffset = 0;
|
||||
|
||||
for (int i = 0; i < updatedCounts.length; i++) {
|
||||
int n = counts[i];
|
||||
int k = updatedCounts[i];
|
||||
|
||||
for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
|
||||
toPurge.set(readOffset + purgedElement);
|
||||
|
||||
readOffset += counts[i];
|
||||
}
|
||||
downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
|
||||
|
||||
addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
|
||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||
}
|
||||
}
|
||||
samplePartitioner.reset();
|
||||
}
|
||||
|
||||
|
|
@ -516,134 +583,380 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
|
|||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
* @param maxReads Maximum number of reads to add.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
|
||||
|
||||
int readCount = 0;
|
||||
for (SAMRecord read : reads) {
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
if (readCount < maxReads) {
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
readCount++;
|
||||
}
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
|
||||
private int thisSampleReadStates = 0;
|
||||
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
|
||||
performLevelingDownsampling ?
|
||||
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
|
||||
null;
|
||||
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||
private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
||||
private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
|
||||
private int downsamplingExtent = 0;
|
||||
|
||||
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
|
||||
thisSampleReadStates += states.size();
|
||||
readStates.addAll(states);
|
||||
readStateCounter.add(new Counter(states.size()));
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( levelingDownsampler != null ) {
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
return readStates.isEmpty();
|
||||
}
|
||||
|
||||
public SAMRecordState peek() {
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
return readStates.peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return thisSampleReadStates;
|
||||
return readStates.size();
|
||||
}
|
||||
|
||||
public void specifyNewDownsamplingExtent(int downsamplingExtent) {
|
||||
this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public int[] getCountsPerAlignmentStart() {
|
||||
int[] counts = new int[readStateCounter.size()];
|
||||
int index = 0;
|
||||
for (Counter counter : readStateCounter)
|
||||
counts[index++] = counter.getCount();
|
||||
return counts;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordState> iterator() {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<SAMRecordState> currentPositionReadStates = null;
|
||||
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
|
||||
|
||||
public boolean hasNext() {
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
return wrappedIterator.hasNext();
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
return wrappedIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
wrappedIterator.remove();
|
||||
Counter counter = readStateCounter.peek();
|
||||
counter.decrement();
|
||||
if (counter.getCount() == 0)
|
||||
readStateCounter.remove();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Purge the given elements from the bitset. If an element in the bitset is true, purge
|
||||
* the corresponding read state.
|
||||
*
|
||||
* @param elements bits from the set to purge.
|
||||
* @return the extent of the final downsampled read.
|
||||
*/
|
||||
public int purge(final BitSet elements) {
|
||||
int downsamplingExtent = 0;
|
||||
|
||||
if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
|
||||
|
||||
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
|
||||
|
||||
Iterator<Counter> counterIterator = readStateCounter.iterator();
|
||||
Counter currentCounter = counterIterator.next();
|
||||
|
||||
int readIndex = 0;
|
||||
long alignmentStartCounter = currentCounter.getCount();
|
||||
|
||||
int toPurge = elements.nextSetBit(0);
|
||||
int removedCount = 0;
|
||||
|
||||
while (readStateIterator.hasNext() && toPurge >= 0) {
|
||||
SAMRecordState state = readStateIterator.next();
|
||||
downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
|
||||
|
||||
if (readIndex == toPurge) {
|
||||
readStateIterator.remove();
|
||||
currentCounter.decrement();
|
||||
if (currentCounter.getCount() == 0)
|
||||
counterIterator.remove();
|
||||
removedCount++;
|
||||
toPurge = elements.nextSetBit(toPurge + 1);
|
||||
}
|
||||
|
||||
readIndex++;
|
||||
alignmentStartCounter--;
|
||||
if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
|
||||
currentCounter = counterIterator.next();
|
||||
alignmentStartCounter = currentCounter.getCount();
|
||||
}
|
||||
}
|
||||
|
||||
totalReadStates -= removedCount;
|
||||
|
||||
return downsamplingExtent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
* Note: assuming that, whenever we downsample, we downsample to an integer capacity.
|
||||
*/
|
||||
private class SamplePartitioner {
|
||||
private Map<String, Collection<SAMRecord>> readsBySample;
|
||||
private long readsSeen = 0;
|
||||
static private class Counter {
|
||||
private int count;
|
||||
|
||||
public SamplePartitioner() {
|
||||
readsBySample = new HashMap<String, Collection<SAMRecord>>();
|
||||
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample, new ArrayList<SAMRecord>());
|
||||
}
|
||||
public Counter(int count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).add(read);
|
||||
readsSeen++;
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
return readsBySample.get(sampleName);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for ( Collection<SAMRecord> perSampleReads : readsBySample.values() )
|
||||
perSampleReads.clear();
|
||||
readsSeen = 0;
|
||||
public void decrement() {
|
||||
count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects reads passed to it based on a criteria decided through inheritance.
|
||||
* TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option. Get rid of this.
|
||||
*/
|
||||
interface ReadSelector {
|
||||
/**
|
||||
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
|
||||
*
|
||||
* @param read the read to evaluate.
|
||||
*/
|
||||
public void submitRead(SAMRecord read);
|
||||
|
||||
/**
|
||||
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
|
||||
*
|
||||
* @param read the read previously rejected.
|
||||
*/
|
||||
public void notifyReadRejected(SAMRecord read);
|
||||
|
||||
/**
|
||||
* Signal the selector that read additions are complete.
|
||||
*/
|
||||
public void complete();
|
||||
|
||||
/**
|
||||
* Retrieve the number of reads seen by this selector so far.
|
||||
*
|
||||
* @return number of reads seen.
|
||||
*/
|
||||
public long getNumReadsSeen();
|
||||
|
||||
/**
|
||||
* Return the number of reads accepted by this selector so far.
|
||||
*
|
||||
* @return number of reads selected.
|
||||
*/
|
||||
public long getNumReadsSelected();
|
||||
|
||||
/**
|
||||
* Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the
|
||||
* last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
|
||||
* position 3 whose cigar string is 76M, the value of this parameter will be 78.
|
||||
*
|
||||
* @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0.
|
||||
*/
|
||||
public int getDownsamplingExtent();
|
||||
|
||||
/**
|
||||
* Get the reads selected by this selector.
|
||||
*
|
||||
* @return collection of reads selected by this selector.
|
||||
*/
|
||||
public Collection<SAMRecord> getSelectedReads();
|
||||
|
||||
/**
|
||||
* Reset this collection to its pre-gathered state.
|
||||
*/
|
||||
public void reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Select every read passed in.
|
||||
*/
|
||||
class AllReadsSelector implements ReadSelector {
|
||||
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||
private long readsSeen = 0;
|
||||
private int downsamplingExtent = 0;
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
reads.add(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reads;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reads.clear();
|
||||
readsSeen = 0;
|
||||
downsamplingExtent = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Select N reads randomly from the input stream.
|
||||
*/
|
||||
class NRandomReadSelector implements ReadSelector {
|
||||
private final LegacyReservoirDownsampler<SAMRecord> reservoir;
|
||||
private final ReadSelector chainedSelector;
|
||||
private long readsSeen = 0;
|
||||
private int downsamplingExtent = 0;
|
||||
|
||||
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||
this.reservoir = new LegacyReservoirDownsampler<SAMRecord>((int) readLimit);
|
||||
this.chainedSelector = chainedSelector;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
SAMRecord displaced = reservoir.add(read);
|
||||
if (displaced != null && chainedSelector != null) {
|
||||
chainedSelector.notifyReadRejected(read);
|
||||
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
|
||||
}
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
for (SAMRecord read : reservoir.getDownsampledContents())
|
||||
chainedSelector.submitRead(read);
|
||||
if (chainedSelector != null)
|
||||
chainedSelector.complete();
|
||||
}
|
||||
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return reservoir.size();
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reservoir.getDownsampledContents();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reservoir.clear();
|
||||
downsamplingExtent = 0;
|
||||
if (chainedSelector != null)
|
||||
chainedSelector.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
class SamplePartitioner implements ReadSelector {
|
||||
private final Map<String, ReadSelector> readsBySample;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
|
||||
readsBySample = readSelectors;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submitRead(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
int downsamplingExtent = 0;
|
||||
for (ReadSelector storage : readsBySample.values())
|
||||
downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
|
||||
}
|
||||
|
||||
public ReadSelector getSelectedReads(String sampleName) {
|
||||
if (!readsBySample.containsKey(sampleName))
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
return readsBySample.get(sampleName);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for (ReadSelector storage : readsBySample.values())
|
||||
storage.reset();
|
||||
readsSeen = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -31,14 +31,11 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
|
|
@ -54,7 +51,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||
private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
|
||||
static private class SAMRecordState {
|
||||
protected static class SAMRecordState {
|
||||
SAMRecord read;
|
||||
int readOffset = -1; // how far are we offset from the start of the read bases?
|
||||
int genomeOffset = -1; // how far are we offset from the alignment start on the genome?
|
||||
|
|
@ -213,6 +210,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//final boolean DEBUG2 = false && DEBUG;
|
||||
private ReadProperties readInfo;
|
||||
private AlignmentContext nextAlignmentContext;
|
||||
private boolean performDownsampling;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -224,7 +222,18 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
this.readInfo = readInformation;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
|
||||
|
||||
// LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
|
||||
// downsampling to coverage by sample. SAMDataSource will have refrained from applying
|
||||
// any downsamplers to the read stream in this case, in the expectation that LIBS will
|
||||
// manage the downsampling. The reason for this is twofold: performance (don't have to
|
||||
// split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
|
||||
// of reads (eg., using half of a read, and throwing the rest away).
|
||||
this.performDownsampling = readInfo.getDownsamplingMethod() != null &&
|
||||
readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
|
||||
readInfo.getDownsamplingMethod().toCoverage != null;
|
||||
|
||||
this.readStates = new ReadStateManager(samIterator);
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
|
|
@ -285,11 +294,13 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
final GenomeLoc location = getLocation();
|
||||
final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
|
||||
|
||||
// TODO: How can you determine here whether the current pileup has been downsampled?
|
||||
boolean hasBeenSampled = false;
|
||||
|
||||
for (final String sample : samples) {
|
||||
final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
||||
int size = 0; // number of elements in this sample's pileup
|
||||
int nDeletions = 0; // number of deletions in this sample's pileup
|
||||
|
|
@ -398,34 +409,20 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
|
||||
}
|
||||
|
||||
private class ReadStateManager {
|
||||
protected class ReadStateManager {
|
||||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
|
||||
private final int targetCoverage;
|
||||
private int totalReadStates = 0;
|
||||
|
||||
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
|
||||
public ReadStateManager(Iterator<SAMRecord> source) {
|
||||
this.iterator = new PeekableIterator<SAMRecord>(source);
|
||||
this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
|
||||
switch (this.downsamplingMethod.type) {
|
||||
case BY_SAMPLE:
|
||||
if (downsamplingMethod.toCoverage == null)
|
||||
throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
|
||||
this.targetCoverage = downsamplingMethod.toCoverage;
|
||||
break;
|
||||
default:
|
||||
this.targetCoverage = Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
|
||||
for (final String sample : samples) {
|
||||
readStatesBySample.put(sample, new PerSampleReadStateManager());
|
||||
readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(readSelectors);
|
||||
samplePartitioner = new SamplePartitioner(performDownsampling);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -449,7 +446,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
totalReadStates--;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -477,17 +473,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
/**
|
||||
* The extent of downsampling; basically, the furthest base out which has 'fallen
|
||||
* victim' to the downsampler.
|
||||
*
|
||||
* @param sample Sample, downsampled independently.
|
||||
* @return Integer stop of the furthest undownsampled region.
|
||||
*/
|
||||
public int getDownsamplingExtent(final String sample) {
|
||||
return readStatesBySample.get(sample).getDownsamplingExtent();
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for (final String sample : samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
|
|
@ -520,61 +505,15 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
samplePartitioner.submitRead(iterator.next());
|
||||
}
|
||||
}
|
||||
samplePartitioner.complete();
|
||||
|
||||
samplePartitioner.doneSubmittingReads();
|
||||
|
||||
for (final String sample : samples) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
|
||||
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||
|
||||
Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
|
||||
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
|
||||
int numReads = statesBySample.size();
|
||||
int downsamplingExtent = aggregator.getDownsamplingExtent();
|
||||
|
||||
if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
|
||||
long readLimit = aggregator.getNumReadsSeen();
|
||||
addReadsToSample(statesBySample, newReads, readLimit);
|
||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||
} else {
|
||||
int[] counts = statesBySample.getCountsPerAlignmentStart();
|
||||
int[] updatedCounts = new int[counts.length];
|
||||
System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
|
||||
|
||||
boolean readPruned = true;
|
||||
while (numReads + newReads.size() > targetCoverage && readPruned) {
|
||||
readPruned = false;
|
||||
for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
|
||||
if (updatedCounts[alignmentStart] > 1) {
|
||||
updatedCounts[alignmentStart]--;
|
||||
numReads--;
|
||||
readPruned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numReads == targetCoverage) {
|
||||
updatedCounts[0]--;
|
||||
numReads--;
|
||||
}
|
||||
|
||||
BitSet toPurge = new BitSet(readStates.size());
|
||||
int readOffset = 0;
|
||||
|
||||
for (int i = 0; i < updatedCounts.length; i++) {
|
||||
int n = counts[i];
|
||||
int k = updatedCounts[i];
|
||||
|
||||
for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
|
||||
toPurge.set(readOffset + purgedElement);
|
||||
|
||||
readOffset += counts[i];
|
||||
}
|
||||
downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
|
||||
|
||||
addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
|
||||
statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
|
||||
}
|
||||
addReadsToSample(statesBySample, newReads);
|
||||
}
|
||||
|
||||
samplePartitioner.reset();
|
||||
}
|
||||
|
||||
|
|
@ -583,380 +522,140 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
*
|
||||
* @param readStates The list of read states to add this collection of reads.
|
||||
* @param reads Reads to add. Selected reads will be pulled from this source.
|
||||
* @param maxReads Maximum number of reads to add.
|
||||
*/
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
|
||||
private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
|
||||
if (reads.isEmpty())
|
||||
return;
|
||||
|
||||
Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
|
||||
int readCount = 0;
|
||||
|
||||
for (SAMRecord read : reads) {
|
||||
if (readCount < maxReads) {
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
readCount++;
|
||||
}
|
||||
SAMRecordState state = new SAMRecordState(read);
|
||||
state.stepForwardOnGenome();
|
||||
newReadStates.add(state);
|
||||
}
|
||||
|
||||
readStates.addStatesAtNextAlignmentStart(newReadStates);
|
||||
}
|
||||
|
||||
private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||
private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
|
||||
private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
|
||||
private int downsamplingExtent = 0;
|
||||
protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
|
||||
private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
|
||||
private int thisSampleReadStates = 0;
|
||||
private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
|
||||
performDownsampling ?
|
||||
new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
|
||||
null;
|
||||
|
||||
public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
|
||||
readStates.addAll(states);
|
||||
readStateCounter.add(new Counter(states.size()));
|
||||
if ( states.isEmpty() ) {
|
||||
return;
|
||||
}
|
||||
|
||||
readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
|
||||
thisSampleReadStates += states.size();
|
||||
totalReadStates += states.size();
|
||||
|
||||
if ( levelingDownsampler != null ) {
|
||||
levelingDownsampler.submit(readStatesByAlignmentStart);
|
||||
levelingDownsampler.signalEndOfInput();
|
||||
|
||||
thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
|
||||
|
||||
// use returned List directly rather than make a copy, for efficiency's sake
|
||||
readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
|
||||
levelingDownsampler.reset();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return readStates.isEmpty();
|
||||
return readStatesByAlignmentStart.isEmpty();
|
||||
}
|
||||
|
||||
public SAMRecordState peek() {
|
||||
return readStates.peek();
|
||||
return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return readStates.size();
|
||||
}
|
||||
|
||||
public void specifyNewDownsamplingExtent(int downsamplingExtent) {
|
||||
this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public int[] getCountsPerAlignmentStart() {
|
||||
int[] counts = new int[readStateCounter.size()];
|
||||
int index = 0;
|
||||
for (Counter counter : readStateCounter)
|
||||
counts[index++] = counter.getCount();
|
||||
return counts;
|
||||
return thisSampleReadStates;
|
||||
}
|
||||
|
||||
public Iterator<SAMRecordState> iterator() {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
|
||||
private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
|
||||
private LinkedList<SAMRecordState> currentPositionReadStates = null;
|
||||
private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
|
||||
|
||||
public boolean hasNext() {
|
||||
return wrappedIterator.hasNext();
|
||||
return alignmentStartIterator.hasNext() ||
|
||||
(currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
|
||||
}
|
||||
|
||||
public SAMRecordState next() {
|
||||
return wrappedIterator.next();
|
||||
if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
|
||||
currentPositionReadStates = alignmentStartIterator.next();
|
||||
currentPositionReadStatesIterator = currentPositionReadStates.iterator();
|
||||
}
|
||||
|
||||
return currentPositionReadStatesIterator.next();
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
wrappedIterator.remove();
|
||||
Counter counter = readStateCounter.peek();
|
||||
counter.decrement();
|
||||
if (counter.getCount() == 0)
|
||||
readStateCounter.remove();
|
||||
currentPositionReadStatesIterator.remove();
|
||||
thisSampleReadStates--;
|
||||
totalReadStates--;
|
||||
|
||||
if ( currentPositionReadStates.isEmpty() ) {
|
||||
alignmentStartIterator.remove();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Purge the given elements from the bitset. If an element in the bitset is true, purge
|
||||
* the corresponding read state.
|
||||
*
|
||||
* @param elements bits from the set to purge.
|
||||
* @return the extent of the final downsampled read.
|
||||
*/
|
||||
public int purge(final BitSet elements) {
|
||||
int downsamplingExtent = 0;
|
||||
/**
|
||||
* Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
|
||||
*
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
private class SamplePartitioner {
|
||||
private Map<String, Downsampler<SAMRecord>> readsBySample;
|
||||
|
||||
if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
|
||||
public SamplePartitioner( boolean downsampleReads ) {
|
||||
readsBySample = new HashMap<String, Downsampler<SAMRecord>>();
|
||||
|
||||
Iterator<SAMRecordState> readStateIterator = readStates.iterator();
|
||||
for ( String sample : samples ) {
|
||||
readsBySample.put(sample,
|
||||
downsampleReads ? new ReservoirDownsampler<SAMRecord>(readInfo.getDownsamplingMethod().toCoverage) :
|
||||
new PassThroughDownsampler<SAMRecord>());
|
||||
}
|
||||
}
|
||||
|
||||
Iterator<Counter> counterIterator = readStateCounter.iterator();
|
||||
Counter currentCounter = counterIterator.next();
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submit(read);
|
||||
}
|
||||
|
||||
int readIndex = 0;
|
||||
long alignmentStartCounter = currentCounter.getCount();
|
||||
public void doneSubmittingReads() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().signalEndOfInput();
|
||||
}
|
||||
}
|
||||
|
||||
int toPurge = elements.nextSetBit(0);
|
||||
int removedCount = 0;
|
||||
public Collection<SAMRecord> getReadsForSample(String sampleName) {
|
||||
if ( ! readsBySample.containsKey(sampleName) )
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
|
||||
while (readStateIterator.hasNext() && toPurge >= 0) {
|
||||
SAMRecordState state = readStateIterator.next();
|
||||
downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
|
||||
return readsBySample.get(sampleName).consumeFinalizedItems();
|
||||
}
|
||||
|
||||
if (readIndex == toPurge) {
|
||||
readStateIterator.remove();
|
||||
currentCounter.decrement();
|
||||
if (currentCounter.getCount() == 0)
|
||||
counterIterator.remove();
|
||||
removedCount++;
|
||||
toPurge = elements.nextSetBit(toPurge + 1);
|
||||
}
|
||||
|
||||
readIndex++;
|
||||
alignmentStartCounter--;
|
||||
if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
|
||||
currentCounter = counterIterator.next();
|
||||
alignmentStartCounter = currentCounter.getCount();
|
||||
}
|
||||
}
|
||||
|
||||
totalReadStates -= removedCount;
|
||||
|
||||
return downsamplingExtent;
|
||||
public void reset() {
|
||||
for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
|
||||
perSampleReads.getValue().clear();
|
||||
perSampleReads.getValue().reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: assuming that, whenever we downsample, we downsample to an integer capacity.
|
||||
*/
|
||||
static private class Counter {
|
||||
private int count;
|
||||
|
||||
public Counter(int count) {
|
||||
this.count = count;
|
||||
}
|
||||
|
||||
public int getCount() {
|
||||
return count;
|
||||
}
|
||||
|
||||
public void decrement() {
|
||||
count--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Selects reads passed to it based on a criteria decided through inheritance.
|
||||
* TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option. Get rid of this.
|
||||
*/
|
||||
interface ReadSelector {
|
||||
/**
|
||||
* All previous selectors in the chain have allowed this read. Submit it to this selector for consideration.
|
||||
*
|
||||
* @param read the read to evaluate.
|
||||
*/
|
||||
public void submitRead(SAMRecord read);
|
||||
|
||||
/**
|
||||
* A previous selector has deemed this read unfit. Notify this selector so that this selector's counts are valid.
|
||||
*
|
||||
* @param read the read previously rejected.
|
||||
*/
|
||||
public void notifyReadRejected(SAMRecord read);
|
||||
|
||||
/**
|
||||
* Signal the selector that read additions are complete.
|
||||
*/
|
||||
public void complete();
|
||||
|
||||
/**
|
||||
* Retrieve the number of reads seen by this selector so far.
|
||||
*
|
||||
* @return number of reads seen.
|
||||
*/
|
||||
public long getNumReadsSeen();
|
||||
|
||||
/**
|
||||
* Return the number of reads accepted by this selector so far.
|
||||
*
|
||||
* @return number of reads selected.
|
||||
*/
|
||||
public long getNumReadsSelected();
|
||||
|
||||
/**
|
||||
* Gets the locus at which the last of the downsampled reads selected by this selector ends. The value returned will be the
|
||||
* last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
|
||||
* position 3 whose cigar string is 76M, the value of this parameter will be 78.
|
||||
*
|
||||
* @return If any read has been downsampled, this will return the last aligned base of the longest alignment. Else, 0.
|
||||
*/
|
||||
public int getDownsamplingExtent();
|
||||
|
||||
/**
|
||||
* Get the reads selected by this selector.
|
||||
*
|
||||
* @return collection of reads selected by this selector.
|
||||
*/
|
||||
public Collection<SAMRecord> getSelectedReads();
|
||||
|
||||
/**
|
||||
* Reset this collection to its pre-gathered state.
|
||||
*/
|
||||
public void reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Select every read passed in.
|
||||
*/
|
||||
class AllReadsSelector implements ReadSelector {
|
||||
private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
|
||||
private long readsSeen = 0;
|
||||
private int downsamplingExtent = 0;
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
reads.add(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reads;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reads.clear();
|
||||
readsSeen = 0;
|
||||
downsamplingExtent = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Select N reads randomly from the input stream.
|
||||
*/
|
||||
class NRandomReadSelector implements ReadSelector {
|
||||
private final ReservoirDownsampler<SAMRecord> reservoir;
|
||||
private final ReadSelector chainedSelector;
|
||||
private long readsSeen = 0;
|
||||
private int downsamplingExtent = 0;
|
||||
|
||||
public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
|
||||
this.reservoir = new ReservoirDownsampler<SAMRecord>((int) readLimit);
|
||||
this.chainedSelector = chainedSelector;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
SAMRecord displaced = reservoir.add(read);
|
||||
if (displaced != null && chainedSelector != null) {
|
||||
chainedSelector.notifyReadRejected(read);
|
||||
downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
|
||||
}
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
for (SAMRecord read : reservoir.getDownsampledContents())
|
||||
chainedSelector.submitRead(read);
|
||||
if (chainedSelector != null)
|
||||
chainedSelector.complete();
|
||||
}
|
||||
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return reservoir.size();
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
return reservoir.getDownsampledContents();
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
reservoir.clear();
|
||||
downsamplingExtent = 0;
|
||||
if (chainedSelector != null)
|
||||
chainedSelector.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Note: stores reads by sample ID string, not by sample object
|
||||
*/
|
||||
class SamplePartitioner implements ReadSelector {
|
||||
private final Map<String, ReadSelector> readsBySample;
|
||||
private long readsSeen = 0;
|
||||
|
||||
public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
|
||||
readsBySample = readSelectors;
|
||||
}
|
||||
|
||||
public void submitRead(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).submitRead(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void notifyReadRejected(SAMRecord read) {
|
||||
String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
|
||||
if (readsBySample.containsKey(sampleName))
|
||||
readsBySample.get(sampleName).notifyReadRejected(read);
|
||||
readsSeen++;
|
||||
}
|
||||
|
||||
public void complete() {
|
||||
// NO-OP.
|
||||
}
|
||||
|
||||
public long getNumReadsSeen() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public long getNumReadsSelected() {
|
||||
return readsSeen;
|
||||
}
|
||||
|
||||
public int getDownsamplingExtent() {
|
||||
int downsamplingExtent = 0;
|
||||
for (ReadSelector storage : readsBySample.values())
|
||||
downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
|
||||
return downsamplingExtent;
|
||||
}
|
||||
|
||||
public Collection<SAMRecord> getSelectedReads() {
|
||||
throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
|
||||
}
|
||||
|
||||
public ReadSelector getSelectedReads(String sampleName) {
|
||||
if (!readsBySample.containsKey(sampleName))
|
||||
throw new NoSuchElementException("Sample name not found");
|
||||
return readsBySample.get(sampleName);
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
for (ReadSelector storage : readsBySample.values())
|
||||
storage.reset();
|
||||
readsSeen = 0;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -343,7 +343,7 @@ public class GATKReport {
|
|||
|
||||
GATKReportTable table = tables.firstEntry().getValue();
|
||||
if ( table.getNumColumns() != values.length )
|
||||
throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
|
||||
throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
|
||||
|
||||
final int rowIndex = table.getNumRows();
|
||||
for ( int i = 0; i < values.length; i++ )
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
|
|
@ -52,7 +51,7 @@ public class AFCalcResult {
|
|||
private final double[] log10PriorsOfAC;
|
||||
private final double[] log10PosteriorsOfAC;
|
||||
|
||||
private final Map<Allele, Double> log10pNonRefByAllele;
|
||||
private final Map<Allele, Double> log10pRefByAllele;
|
||||
|
||||
/**
|
||||
* The AC values for all ALT alleles at the MLE
|
||||
|
|
@ -74,16 +73,16 @@ public class AFCalcResult {
|
|||
final List<Allele> allelesUsedInGenotyping,
|
||||
final double[] log10LikelihoodsOfAC,
|
||||
final double[] log10PriorsOfAC,
|
||||
final Map<Allele, Double> log10pNonRefByAllele) {
|
||||
final Map<Allele, Double> log10pRefByAllele) {
|
||||
if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
|
||||
if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
|
||||
if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
|
||||
if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations);
|
||||
if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2");
|
||||
if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2");
|
||||
if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
|
||||
if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null");
|
||||
if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
|
||||
if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
|
||||
|
||||
|
|
@ -94,7 +93,7 @@ public class AFCalcResult {
|
|||
this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES);
|
||||
this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES);
|
||||
this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC);
|
||||
this.log10pNonRefByAllele = new HashMap<Allele, Double>(log10pNonRefByAllele);
|
||||
this.log10pRefByAllele = new HashMap<Allele, Double>(log10pRefByAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -104,7 +103,7 @@ public class AFCalcResult {
|
|||
* @return
|
||||
*/
|
||||
public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
|
||||
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
|
||||
return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -219,7 +218,7 @@ public class AFCalcResult {
|
|||
public String toString() {
|
||||
final List<String> byAllele = new LinkedList<String>();
|
||||
for ( final Allele a : getAllelesUsedInGenotyping() )
|
||||
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
|
||||
if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a)));
|
||||
return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
|
||||
}
|
||||
|
||||
|
|
@ -237,7 +236,7 @@ public class AFCalcResult {
|
|||
*/
|
||||
@Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
|
||||
public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
|
||||
return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
|
||||
return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -245,7 +244,7 @@ public class AFCalcResult {
|
|||
*/
|
||||
public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
|
||||
if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
|
||||
final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
|
||||
final double log10Threshold = minPNonRefPhredScaledQual / -10;
|
||||
return isPolymorphic(allele, log10Threshold);
|
||||
}
|
||||
|
||||
|
|
@ -263,7 +262,16 @@ public class AFCalcResult {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns the log10 probability that allele is segregating
|
||||
* Returns the log10 probability that allele is not segregating
|
||||
*
|
||||
* Note that this function is p not segregating so that we can store
|
||||
* internally the log10 value of AF == 0, which grows very quickly
|
||||
* negative and yet has sufficient resolution for high confidence tests.
|
||||
* For example, if log10pRef == -100, not an unreasonably high number,
|
||||
* if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which
|
||||
* quickly underflows to 1. So the logic here is backward from what
|
||||
* you really want (the p of segregating) but we do that for numerical
|
||||
* reasons
|
||||
*
|
||||
* Unlike the sites-level annotation, this calculation is specific to allele, and can be
|
||||
* used to separately determine how much evidence there is that allele is independently
|
||||
|
|
@ -272,11 +280,11 @@ public class AFCalcResult {
|
|||
* evidence for one allele but not so much for any other allele
|
||||
*
|
||||
* @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
|
||||
* @return the log10 probability that allele is segregating at this site
|
||||
* @return the log10 probability that allele is not segregating at this site
|
||||
*/
|
||||
@Ensures("MathUtils.goodLog10Probability(result)")
|
||||
public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
|
||||
final Double log10pNonRef = log10pNonRefByAllele.get(allele);
|
||||
public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) {
|
||||
final Double log10pNonRef = log10pRefByAllele.get(allele);
|
||||
if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
|
||||
return log10pNonRef;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable {
|
|||
for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
|
||||
if ( allele.isNonReference() ) {
|
||||
printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
|
||||
printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
|
||||
printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable {
|
|||
final double[] posteriors = new double[2];
|
||||
final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
|
||||
final List<Integer> mle = new ArrayList<Integer>();
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
|
||||
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>();
|
||||
long runtimeNano = -1;
|
||||
|
||||
GenomeLoc currentLoc = null;
|
||||
|
|
@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable {
|
|||
builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
|
||||
builder.genotypes(genotypes);
|
||||
final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
|
||||
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
|
||||
final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
|
||||
calls.add(new ExactCall(builder.make(), runtimeNano, result));
|
||||
}
|
||||
break;
|
||||
|
|
@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable {
|
|||
posteriors[1] = Double.valueOf(value);
|
||||
} else if (variable.equals("MLE")) {
|
||||
mle.add(Integer.valueOf(value));
|
||||
} else if (variable.equals("pNonRefByAllele")) {
|
||||
} else if (variable.equals("pRefByAllele")) {
|
||||
final Allele a = Allele.create(key);
|
||||
log10pNonRefByAllele.put(a, Double.valueOf(value));
|
||||
log10pRefByAllele.put(a, Double.valueOf(value));
|
||||
} else if (variable.equals("runtime.nano")) {
|
||||
runtimeNano = Long.valueOf(value);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -125,8 +125,8 @@ import java.util.*;
|
|||
*/
|
||||
final List<AFCalcResult> supporting;
|
||||
|
||||
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
|
||||
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
|
||||
private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
|
||||
super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
|
||||
this.supporting = supporting;
|
||||
}
|
||||
}
|
||||
|
|
@ -323,7 +323,7 @@ import java.util.*;
|
|||
final int nAltAlleles = sortedResultsWithThetaNPriors.size();
|
||||
final int[] alleleCountsOfMLE = new int[nAltAlleles];
|
||||
final double[] log10PriorsOfAC = new double[2];
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||
|
||||
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
|
||||
double log10PosteriorOfACEq0Sum = 0.0;
|
||||
|
|
@ -348,7 +348,7 @@ import java.util.*;
|
|||
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
||||
|
||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
||||
log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0());
|
||||
|
||||
// trivial -- update the number of evaluations
|
||||
nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
|
||||
|
|
@ -384,6 +384,6 @@ import java.util.*;
|
|||
MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
|
||||
// priors incorporate multiple alt alleles, must be normalized
|
||||
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
|
||||
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
||||
log10pRefByAllele, sortedResultsWithThetaNPriors);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
|
|||
final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)};
|
||||
final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors);
|
||||
|
||||
final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO;
|
||||
final Map<Allele, Double> log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef);
|
||||
final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
|
||||
final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
|
||||
|
||||
return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
|
||||
MathUtils.normalizeFromLog10(log10Likelihoods, true),
|
||||
MathUtils.normalizeFromLog10(log10Priors, true),
|
||||
log10pNonRefByAllele);
|
||||
log10pRefByAllele);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -165,14 +165,14 @@ final class StateTracker {
|
|||
final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
|
||||
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
|
||||
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
|
||||
final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
|
||||
for ( int i = 0; i < subACOfMLE.length; i++ ) {
|
||||
final Allele allele = allelesUsedInGenotyping.get(i+1);
|
||||
final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
|
||||
log10pNonRefByAllele.put(allele, log10PNonRef);
|
||||
final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was
|
||||
log10pRefByAllele.put(allele, log10PRef);
|
||||
}
|
||||
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
|
||||
return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -287,6 +287,9 @@ public class PairHMMIndelErrorModel {
|
|||
if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
|
||||
startLocationInRefForHaplotypes = ref.getWindow().getStart(); // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
|
||||
}
|
||||
else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
|
||||
startLocationInRefForHaplotypes = ref.getWindow().getStop(); // read starts after haplotype: read will have to be clipped completely;
|
||||
}
|
||||
|
||||
if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
|
||||
stopLocationInRefForHaplotypes = ref.getWindow().getStop(); // check also if end of read will go beyond reference context
|
||||
|
|
@ -338,6 +341,8 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
|
||||
startLocationInRefForHaplotypes = haplotype.getStartPosition();
|
||||
else if (startLocationInRefForHaplotypes > haplotype.getStopPosition())
|
||||
startLocationInRefForHaplotypes = haplotype.getStopPosition();
|
||||
|
||||
final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
|
||||
final long indStop = stopLocationInRefForHaplotypes - haplotype.getStartPosition();
|
||||
|
|
@ -347,8 +352,6 @@ public class PairHMMIndelErrorModel {
|
|||
System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
|
||||
indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
|
||||
|
||||
|
||||
|
||||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,149 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Emits intervals present in either the original or reduced bam but not the other.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The original and reduced BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A list of intervals present in one bam but not the other.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -I:original original.bam \
|
||||
* -I:reduced reduced.bam \
|
||||
* -R ref.fasta \
|
||||
* -T AssessReducedCoverage \
|
||||
* -o output.intervals
|
||||
* </pre>
|
||||
*
|
||||
* @author ebanks
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||
@Hidden
|
||||
public class AssessReducedCoverage extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||
|
||||
private static final String original = "original";
|
||||
private static final String reduced = "reduced";
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
@Argument(fullName = "output_reduced_only_coverage", shortName = "output_reduced_only_coverage", doc = "Output an interval if the reduced bam has coverage where the original does not", required = false)
|
||||
public boolean OUTPUT_REDUCED_ONLY_INTERVALS = false;
|
||||
|
||||
public void initialize() {}
|
||||
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
Set<String> tags = getAllTags(context.getBasePileup());
|
||||
return (tags.contains(original) && !tags.contains(reduced)) ||
|
||||
(OUTPUT_REDUCED_ONLY_INTERVALS && tags.contains(reduced) && !tags.contains(original)) ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
private Set<String> getAllTags(final ReadBackedPileup pileup) {
|
||||
|
||||
final Set<String> tags = new HashSet<String>(10);
|
||||
|
||||
for ( final PileupElement p : pileup ) {
|
||||
if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() )
|
||||
tags.addAll(getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags());
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||
if ( lhs == null )
|
||||
return rhs;
|
||||
|
||||
if ( rhs == null )
|
||||
return lhs;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( lhs.contiguousP(rhs) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||
|
||||
// otherwise, print the lhs and start over with the rhs
|
||||
out.println(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( sum.contiguousP(value) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||
|
||||
// otherwise, print the sum and start over with the value
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,173 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.qc;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
|
||||
* the reduced bam are above sufficient threshold)
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The original and reduced BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -I:original original.bam \
|
||||
* -I:reduced reduced.bam \
|
||||
* -R ref.fasta \
|
||||
* -T AssessReducedQuals \
|
||||
* -o output.intervals
|
||||
* </pre>
|
||||
*
|
||||
* @author ami
|
||||
*/
|
||||
|
||||
public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
|
||||
|
||||
private static final String reduced = "reduced";
|
||||
private static final String original = "original";
|
||||
private static final int originalQualsIndex = 0;
|
||||
private static final int reducedQualsIndex = 1;
|
||||
|
||||
@Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
|
||||
public int sufficientQualSum = 600;
|
||||
|
||||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||
public int qual_epsilon = 0;
|
||||
|
||||
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional
|
||||
public int debugLevel = 0; // TODO -- best to make this an enum or boolean
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
||||
public void initialize() {
|
||||
if (debugLevel != 0)
|
||||
out.println(" Debug mode" +
|
||||
"Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
|
||||
"Debug:\tqual_epsilon: "+qual_epsilon);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
||||
@Override
|
||||
public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
||||
boolean reportLocus;
|
||||
final int[] quals = getPileupQuals(context.getBasePileup());
|
||||
if (debugLevel != 0)
|
||||
out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
|
||||
final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
|
||||
final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
|
||||
final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
|
||||
final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
|
||||
reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
|
||||
if(debugLevel != 0 && reportLocus)
|
||||
out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
|
||||
|
||||
return reportLocus ? ref.getLocus() : null;
|
||||
}
|
||||
|
||||
private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
|
||||
|
||||
final int[] quals = new int[2];
|
||||
String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
|
||||
"Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
|
||||
|
||||
for( PileupElement p : readPileup ){
|
||||
final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
|
||||
if ( isGoodRead(p,tags) ){
|
||||
final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
|
||||
final int tagIndex = getTagIndex(tags);
|
||||
quals[tagIndex] += tempQual;
|
||||
if(debugLevel == 2)
|
||||
printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
|
||||
}
|
||||
}
|
||||
if(debugLevel == 2){
|
||||
out.println(printPileup[originalQualsIndex]);
|
||||
out.println(printPileup[reducedQualsIndex]);
|
||||
}
|
||||
return quals;
|
||||
}
|
||||
|
||||
// TODO -- arguments/variables should be final, not method declaration
|
||||
private final boolean isGoodRead(PileupElement p, List<String> tags){
|
||||
// TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which
|
||||
// TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only
|
||||
// TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
|
||||
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||
}
|
||||
|
||||
private final int getTagIndex(List<String> tags){
|
||||
return tags.contains(reduced) ? 1 : 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(GenomeLoc sum) {
|
||||
if ( sum != null )
|
||||
out.println(sum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
|
||||
if ( lhs == null )
|
||||
return rhs;
|
||||
|
||||
if ( rhs == null )
|
||||
return lhs;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( lhs.contiguousP(rhs) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
|
||||
|
||||
// otherwise, print the lhs and start over with the rhs
|
||||
out.println(lhs);
|
||||
return rhs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduceInit() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
|
||||
if ( value == null )
|
||||
return sum;
|
||||
|
||||
if ( sum == null )
|
||||
return value;
|
||||
|
||||
// if contiguous, just merge them
|
||||
if ( sum.contiguousP(value) )
|
||||
return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
|
||||
|
||||
// otherwise, print the sum and start over with the value
|
||||
out.println(sum);
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
|
@ -134,7 +134,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
@Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false)
|
||||
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
|
||||
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
|
||||
|
||||
@Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
|
||||
public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
|
||||
|
|
@ -200,13 +200,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
} else
|
||||
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
|
||||
|
||||
if ( PRIORITY_STRING == null ) {
|
||||
validateAnnotateUnionArguments();
|
||||
if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
|
||||
genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
|
||||
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
|
||||
logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING);
|
||||
logger.info("Priority string not provided, using arbitrary genotyping order: "+priority);
|
||||
}
|
||||
|
||||
validateAnnotateUnionArguments();
|
||||
samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
|
||||
|
||||
if ( SET_KEY.toLowerCase().equals("null") )
|
||||
|
|
@ -228,16 +228,15 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
|
||||
throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
|
||||
|
||||
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE )
|
||||
if ( PRIORITY_STRING != null || genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE ){
|
||||
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
|
||||
else
|
||||
priority = new ArrayList<String>(rodNames);
|
||||
if ( rodNames.size() != priority.size() )
|
||||
throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
|
||||
|
||||
if ( rodNames.size() != priority.size() )
|
||||
throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
|
||||
if ( ! rodNames.containsAll(priority) )
|
||||
throw new UserException.BadArgumentValue("rod_priority_list", "Not all priority elements provided as input RODs: " + PRIORITY_STRING);
|
||||
}
|
||||
|
||||
if ( ! rodNames.containsAll(priority) )
|
||||
throw new UserException.BadArgumentValue("rod_priority_list", "Not all priority elements provided as input RODs: " + PRIORITY_STRING);
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ import java.util.Collection;
|
|||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES
|
||||
*
|
||||
* Randomly downsample from a stream of elements. This algorithm is a direct,
|
||||
* naive implementation of reservoir downsampling as described in "Random Downsampling
|
||||
* with a Reservoir" (Vitter 1985). At time of writing, this paper is located here:
|
||||
|
|
@ -16,7 +18,7 @@ import java.util.Iterator;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class ReservoirDownsampler<T> {
|
||||
public class LegacyReservoirDownsampler<T> {
|
||||
/**
|
||||
* The reservoir of elements tracked by this downsampler.
|
||||
*/
|
||||
|
|
@ -31,7 +33,7 @@ public class ReservoirDownsampler<T> {
|
|||
* Create a new downsampler with the given source iterator and given comparator.
|
||||
* @param maxElements What is the maximum number of reads that can be returned in any call of this
|
||||
*/
|
||||
public ReservoirDownsampler(final int maxElements) {
|
||||
public LegacyReservoirDownsampler(final int maxElements) {
|
||||
if(maxElements < 0)
|
||||
throw new ReviewedStingException("Unable to work with an negative size collection of elements");
|
||||
this.reservoir = new ArrayList<T>(maxElements);
|
||||
|
|
@ -32,11 +32,10 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LegacyLocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -85,7 +84,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
|
|||
GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
|
||||
// Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out?
|
||||
Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
|
||||
LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
while(locusIteratorByState.hasNext()) {
|
||||
locusIteratorByState.next().getLocation();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,10 +45,10 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
|
||||
public class ReadShardBalancerUnitTest extends BaseTest {
|
||||
|
||||
/**
|
||||
* Tests to ensure that ExperimentalReadShardBalancer works as expected and does not place shard boundaries
|
||||
* Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
|
||||
* at inappropriate places, such as within an alignment start position
|
||||
*/
|
||||
private static class ExperimentalReadShardBalancerTest extends TestDataProvider {
|
||||
|
|
@ -74,7 +74,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
|
|||
this.stackSize = stackSize;
|
||||
this.numUnmappedReads = numUnmappedReads;
|
||||
|
||||
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, true);
|
||||
this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false);
|
||||
this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
|
||||
|
||||
setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
|
||||
|
|
@ -96,7 +96,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
|
|||
new ArrayList<ReadFilter>(),
|
||||
false);
|
||||
|
||||
Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ExperimentalReadShardBalancer());
|
||||
Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||
|
||||
SAMRecord readAtEndOfLastShard = null;
|
||||
int totalReadsSeen = 0;
|
||||
|
|
@ -7,10 +7,8 @@ import org.broadinstitute.sting.gatk.ReadProperties;
|
|||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -21,14 +19,17 @@ import org.testng.annotations.BeforeClass;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* testing of the experimental version of LocusIteratorByState
|
||||
* testing of the LEGACY version of LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
|
||||
public class LegacyLocusIteratorByStateUnitTest extends BaseTest {
|
||||
private static SAMFileHeader header;
|
||||
private LocusIteratorByStateExperimental li;
|
||||
private LegacyLocusIteratorByState li;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
|
|
@ -37,8 +38,8 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
|
|||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
private LocusIteratorByStateExperimental makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LocusIteratorByStateExperimental(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups());
|
||||
private LegacyLocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
|
||||
return new LegacyLocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -328,184 +329,14 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
|
|||
// End comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
private class PerSampleReadStateManagerTest extends TestDataProvider {
|
||||
private List<Integer> readCountsPerAlignmentStart;
|
||||
private List<SAMRecord> reads;
|
||||
private List<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>> recordStatesByAlignmentStart;
|
||||
private int removalInterval;
|
||||
|
||||
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
|
||||
super(PerSampleReadStateManagerTest.class);
|
||||
|
||||
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
|
||||
this.removalInterval = removalInterval;
|
||||
|
||||
reads = new ArrayList<SAMRecord>();
|
||||
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>>();
|
||||
|
||||
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
|
||||
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
|
||||
LocusIteratorByStateExperimental.ReadStateManager readStateManager =
|
||||
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
|
||||
LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
|
||||
readStateManager.new PerSampleReadStateManager();
|
||||
|
||||
makeReads();
|
||||
|
||||
for ( ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
|
||||
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
|
||||
}
|
||||
|
||||
// read state manager should have the right number of reads
|
||||
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
|
||||
|
||||
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
|
||||
Iterator<LocusIteratorByStateExperimental.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int recordStateCount = 0;
|
||||
int numReadStatesRemoved = 0;
|
||||
|
||||
// Do a first-pass validation of the record state iteration by making sure we get back everything we
|
||||
// put in, in the same order, doing any requested removals of read states along the way
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
|
||||
recordStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
|
||||
// If requested, remove a read state every removalInterval states
|
||||
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
|
||||
recordStateIterator.remove();
|
||||
numReadStatesRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertFalse(originalReadsIterator.hasNext());
|
||||
|
||||
// If we removed any read states, do a second pass through the read states to make sure the right
|
||||
// states were removed
|
||||
if ( numReadStatesRemoved > 0 ) {
|
||||
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
|
||||
|
||||
originalReadsIterator = reads.iterator();
|
||||
recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int readCount = 0;
|
||||
int readStateCount = 0;
|
||||
|
||||
// Match record states with the reads that should remain after removal
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
|
||||
readStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
readCount++;
|
||||
|
||||
if ( readCount % removalInterval == 0 ) {
|
||||
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
|
||||
readCount++;
|
||||
}
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
}
|
||||
|
||||
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
|
||||
}
|
||||
|
||||
// Allow memory used by this test to be reclaimed
|
||||
readCountsPerAlignmentStart = null;
|
||||
reads = null;
|
||||
recordStatesByAlignmentStart = null;
|
||||
}
|
||||
|
||||
private void makeReads() {
|
||||
int alignmentStart = 1;
|
||||
|
||||
for ( int readsThisStack : readCountsPerAlignmentStart ) {
|
||||
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByStateExperimental.SAMRecordState>();
|
||||
|
||||
for ( SAMRecord read : stackReads ) {
|
||||
stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read));
|
||||
}
|
||||
|
||||
reads.addAll(stackReads);
|
||||
recordStatesByAlignmentStart.add(stackRecordStates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
|
||||
public Object[][] createPerSampleReadStateManagerTests() {
|
||||
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
|
||||
Arrays.asList(2),
|
||||
Arrays.asList(10),
|
||||
Arrays.asList(1, 1),
|
||||
Arrays.asList(2, 2),
|
||||
Arrays.asList(10, 10),
|
||||
Arrays.asList(1, 10),
|
||||
Arrays.asList(10, 1),
|
||||
Arrays.asList(1, 1, 1),
|
||||
Arrays.asList(2, 2, 2),
|
||||
Arrays.asList(10, 10, 10),
|
||||
Arrays.asList(1, 1, 1, 1, 1, 1),
|
||||
Arrays.asList(10, 10, 10, 10, 10, 10),
|
||||
Arrays.asList(1, 2, 10, 1, 2, 10)
|
||||
) ) {
|
||||
|
||||
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
|
||||
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
|
||||
}
|
||||
}
|
||||
|
||||
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
|
||||
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
test.run();
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// End Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Helper methods / classes //
|
||||
///////////////////////////////////////
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
return createTestReadProperties(null);
|
||||
}
|
||||
|
||||
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
SAMFileHeader.SortOrder.coordinate,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
downsamplingMethod,
|
||||
null,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
|
|
@ -513,136 +344,137 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
|
|||
(byte) -1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
private static final class LIBS_position {
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
SAMRecord read;
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final class LIBS_position {
|
||||
|
||||
SAMRecord read;
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
}
|
||||
|
|
@ -7,8 +7,10 @@ import org.broadinstitute.sting.gatk.ReadProperties;
|
|||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -19,13 +21,10 @@ import org.testng.annotations.BeforeClass;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* testing of the LocusIteratorByState
|
||||
* testing of the new (non-legacy) version of LocusIteratorByState
|
||||
*/
|
||||
public class LocusIteratorByStateUnitTest extends BaseTest {
|
||||
private static SAMFileHeader header;
|
||||
|
|
@ -329,14 +328,184 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
// End comprehensive LIBS/PileupElement tests //
|
||||
////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
private class PerSampleReadStateManagerTest extends TestDataProvider {
|
||||
private List<Integer> readCountsPerAlignmentStart;
|
||||
private List<SAMRecord> reads;
|
||||
private List<ArrayList<LocusIteratorByState.SAMRecordState>> recordStatesByAlignmentStart;
|
||||
private int removalInterval;
|
||||
|
||||
public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
|
||||
super(PerSampleReadStateManagerTest.class);
|
||||
|
||||
this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
|
||||
this.removalInterval = removalInterval;
|
||||
|
||||
reads = new ArrayList<SAMRecord>();
|
||||
recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByState.SAMRecordState>>();
|
||||
|
||||
setName(String.format("%s: readCountsPerAlignmentStart: %s removalInterval: %d",
|
||||
getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
|
||||
}
|
||||
|
||||
public void run() {
|
||||
LocusIteratorByState libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
|
||||
LocusIteratorByState.ReadStateManager readStateManager =
|
||||
libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
|
||||
LocusIteratorByState.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
|
||||
readStateManager.new PerSampleReadStateManager();
|
||||
|
||||
makeReads();
|
||||
|
||||
for ( ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
|
||||
perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
|
||||
}
|
||||
|
||||
// read state manager should have the right number of reads
|
||||
Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
|
||||
|
||||
Iterator<SAMRecord> originalReadsIterator = reads.iterator();
|
||||
Iterator<LocusIteratorByState.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int recordStateCount = 0;
|
||||
int numReadStatesRemoved = 0;
|
||||
|
||||
// Do a first-pass validation of the record state iteration by making sure we get back everything we
|
||||
// put in, in the same order, doing any requested removals of read states along the way
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
|
||||
recordStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
|
||||
// If requested, remove a read state every removalInterval states
|
||||
if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
|
||||
recordStateIterator.remove();
|
||||
numReadStatesRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert.assertFalse(originalReadsIterator.hasNext());
|
||||
|
||||
// If we removed any read states, do a second pass through the read states to make sure the right
|
||||
// states were removed
|
||||
if ( numReadStatesRemoved > 0 ) {
|
||||
Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
|
||||
|
||||
originalReadsIterator = reads.iterator();
|
||||
recordStateIterator = perSampleReadStateManager.iterator();
|
||||
int readCount = 0;
|
||||
int readStateCount = 0;
|
||||
|
||||
// Match record states with the reads that should remain after removal
|
||||
while ( recordStateIterator.hasNext() ) {
|
||||
LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
|
||||
readStateCount++;
|
||||
SAMRecord readFromPerSampleReadStateManager = readState.getRead();
|
||||
|
||||
Assert.assertTrue(originalReadsIterator.hasNext());
|
||||
|
||||
SAMRecord originalRead = originalReadsIterator.next();
|
||||
readCount++;
|
||||
|
||||
if ( readCount % removalInterval == 0 ) {
|
||||
originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
|
||||
readCount++;
|
||||
}
|
||||
|
||||
// The read we get back should be literally the same read in memory as we put in (after accounting for removals)
|
||||
Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
|
||||
}
|
||||
|
||||
Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
|
||||
}
|
||||
|
||||
// Allow memory used by this test to be reclaimed
|
||||
readCountsPerAlignmentStart = null;
|
||||
reads = null;
|
||||
recordStatesByAlignmentStart = null;
|
||||
}
|
||||
|
||||
private void makeReads() {
|
||||
int alignmentStart = 1;
|
||||
|
||||
for ( int readsThisStack : readCountsPerAlignmentStart ) {
|
||||
ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
|
||||
ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByState.SAMRecordState>();
|
||||
|
||||
for ( SAMRecord read : stackReads ) {
|
||||
stackRecordStates.add(new LocusIteratorByState.SAMRecordState(read));
|
||||
}
|
||||
|
||||
reads.addAll(stackReads);
|
||||
recordStatesByAlignmentStart.add(stackRecordStates);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
|
||||
public Object[][] createPerSampleReadStateManagerTests() {
|
||||
for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
|
||||
Arrays.asList(2),
|
||||
Arrays.asList(10),
|
||||
Arrays.asList(1, 1),
|
||||
Arrays.asList(2, 2),
|
||||
Arrays.asList(10, 10),
|
||||
Arrays.asList(1, 10),
|
||||
Arrays.asList(10, 1),
|
||||
Arrays.asList(1, 1, 1),
|
||||
Arrays.asList(2, 2, 2),
|
||||
Arrays.asList(10, 10, 10),
|
||||
Arrays.asList(1, 1, 1, 1, 1, 1),
|
||||
Arrays.asList(10, 10, 10, 10, 10, 10),
|
||||
Arrays.asList(1, 2, 10, 1, 2, 10)
|
||||
) ) {
|
||||
|
||||
for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
|
||||
new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
|
||||
}
|
||||
}
|
||||
|
||||
return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
|
||||
public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
|
||||
logger.warn("Running test: " + test);
|
||||
|
||||
test.run();
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
// End Read State Manager Tests //
|
||||
///////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////
|
||||
// Helper methods / classes //
|
||||
///////////////////////////////////////
|
||||
|
||||
private static ReadProperties createTestReadProperties() {
|
||||
return createTestReadProperties(null);
|
||||
}
|
||||
|
||||
private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
|
||||
return new ReadProperties(
|
||||
Collections.<SAMReaderID>emptyList(),
|
||||
new SAMFileHeader(),
|
||||
SAMFileHeader.SortOrder.coordinate,
|
||||
false,
|
||||
SAMFileReader.ValidationStringency.STRICT,
|
||||
null,
|
||||
downsamplingMethod,
|
||||
new ValidationExclusion(),
|
||||
Collections.<ReadFilter>emptyList(),
|
||||
Collections.<ReadTransformer>emptyList(),
|
||||
|
|
@ -344,137 +513,136 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
(byte) -1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
|
||||
Iterator<T> iterator;
|
||||
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
public FakeCloseableIterator(Iterator<T> it) {
|
||||
iterator = it;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
private static final class LIBS_position {
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iterator.hasNext();
|
||||
}
|
||||
SAMRecord read;
|
||||
|
||||
@Override
|
||||
public T next() {
|
||||
return iterator.next();
|
||||
}
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Don't remove!");
|
||||
}
|
||||
}
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
final class LIBS_position {
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
SAMRecord read;
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
final int numOperators;
|
||||
int currentOperatorIndex = 0;
|
||||
int currentPositionOnOperator = 0;
|
||||
int currentReadOffset = 0;
|
||||
|
||||
boolean isBeforeDeletionStart = false;
|
||||
boolean isBeforeDeletedBase = false;
|
||||
boolean isAfterDeletionEnd = false;
|
||||
boolean isAfterDeletedBase = false;
|
||||
boolean isBeforeInsertion = false;
|
||||
boolean isAfterInsertion = false;
|
||||
boolean isNextToSoftClip = false;
|
||||
|
||||
boolean sawMop = false;
|
||||
|
||||
public LIBS_position(final SAMRecord read) {
|
||||
this.read = read;
|
||||
numOperators = read.getCigar().numCigarElements();
|
||||
}
|
||||
|
||||
public int getCurrentReadOffset() {
|
||||
return Math.max(0, currentReadOffset - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
/**
|
||||
* Steps forward on the genome. Returns false when done reading the read, true otherwise.
|
||||
*/
|
||||
public boolean stepForwardOnGenome() {
|
||||
if ( currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
if ( currentPositionOnOperator >= curElement.getLength() ) {
|
||||
if ( ++currentOperatorIndex == numOperators )
|
||||
return false;
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
curElement = read.getCigar().getCigarElement(currentOperatorIndex);
|
||||
currentPositionOnOperator = 0;
|
||||
}
|
||||
|
||||
switch ( curElement.getOperator() ) {
|
||||
case I: // insertion w.r.t. the reference
|
||||
if ( !sawMop )
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
case S: // soft clip
|
||||
currentReadOffset += curElement.getLength();
|
||||
case H: // hard clip
|
||||
case P: // padding
|
||||
currentOperatorIndex++;
|
||||
return stepForwardOnGenome();
|
||||
|
||||
case D: // deletion w.r.t. the reference
|
||||
case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
}
|
||||
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
sawMop = true;
|
||||
currentReadOffset++;
|
||||
currentPositionOnOperator++;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
final boolean isFirstOp = currentOperatorIndex == 0;
|
||||
final boolean isLastOp = currentOperatorIndex == numOperators - 1;
|
||||
final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
|
||||
final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
|
||||
isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
|
||||
isAfterDeletedBase = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
|
||||
isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
|
||||
|| (!sawMop && curElement.getOperator() == CigarOperator.I);
|
||||
isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
|
||||
isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
|
||||
|| isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean isBeforeOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isLastOp,
|
||||
final boolean isLastBaseOfOp) {
|
||||
return !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
|
||||
}
|
||||
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
private static boolean isAfterOp(final Cigar cigar,
|
||||
final int currentOperatorIndex,
|
||||
final CigarOperator op,
|
||||
final boolean isFirstOp,
|
||||
final boolean isFirstBaseOfOp) {
|
||||
return !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,12 +109,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
// TODO: test shard boundaries
|
||||
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100));
|
||||
intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
|
||||
|
||||
|
|
@ -126,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
|
||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||
reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700));
|
||||
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||
|
||||
// required by LocusIteratorByState, and I prefer to list them in test case order above
|
||||
|
|
@ -139,8 +144,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
|
||||
// Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
|
||||
verifyEqualIntervals(intervals, activeIntervals);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
|
|
@ -171,8 +174,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
|
||||
Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
|
||||
verifyActiveRegionCoverage(intervals, activeRegions);
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
|
||||
|
|
@ -231,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// end_of_chr1: Primary in 1:249250600-249250621
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -245,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
|
@ -256,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
|
@ -267,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
|
@ -278,9 +295,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -300,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// end_of_chr1: Primary in 1:249250600-249250621
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -314,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
|
@ -325,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
|
@ -336,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
|
@ -347,9 +379,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
getRead(region, "simple20");
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -370,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// end_of_chr1: Primary in 1:249250600-249250621
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -384,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
|
|
@ -395,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
|
|
@ -406,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
getRead(region, "boundary_unequal");
|
||||
getRead(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
|
||||
|
||||
verifyReadNotPlaced(region, "simple");
|
||||
verifyReadNotPlaced(region, "overlap_equal");
|
||||
verifyReadNotPlaced(region, "overlap_unequal");
|
||||
verifyReadNotPlaced(region, "boundary_equal");
|
||||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
getRead(region, "end_of_chr1");
|
||||
verifyReadNotPlaced(region, "simple20");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
|
|
@ -417,9 +464,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
|
|||
verifyReadNotPlaced(region, "boundary_unequal");
|
||||
verifyReadNotPlaced(region, "extended_and_np");
|
||||
verifyReadNotPlaced(region, "outside_intervals");
|
||||
verifyReadNotPlaced(region, "end_of_chr1");
|
||||
getRead(region, "simple20");
|
||||
}
|
||||
|
||||
// TODO: more tests and edge cases
|
||||
@Test
|
||||
public void testUnmappedReads() {
|
||||
// TODO
|
||||
}
|
||||
|
||||
private void verifyReadNotPlaced(ActiveRegion region, String readName) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ReadShardBalancer;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
|
|
@ -114,7 +114,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
|
||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
|
||||
Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer());
|
||||
|
||||
countReadWalker.initialize();
|
||||
Object accumulator = countReadWalker.reduceInit();
|
||||
|
|
|
|||
|
|
@ -23,14 +23,14 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
|
||||
@Test
|
||||
public void testEmptyIterator() {
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOneElementWithPoolSizeOne() {
|
||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
|
|
@ -42,7 +42,7 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
@Test
|
||||
public void testOneElementWithPoolSizeGreaterThanOne() {
|
||||
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
|
|
@ -58,7 +58,7 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
|
|
@ -78,7 +78,7 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
|
|
@ -99,7 +99,7 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(0);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(0);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
|
||||
|
|
@ -115,7 +115,7 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
|
|
@ -128,7 +128,7 @@ public class LegacyReservoirDownsamplerUnitTest {
|
|||
public void testFillingAcrossLoci() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
|
||||
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
|
||||
LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
|
||||
downsampler.addAll(reads);
|
||||
|
||||
Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
|
||||
|
|
|
|||
Loading…
Reference in New Issue