Merge pull request #193 from broadinstitute/eb_contamination_fixing_for_reduced_reads
Eb contamination fixing for reduced reads
This commit is contained in:
commit
071fd67d55
|
|
@ -54,7 +54,6 @@ import org.broadinstitute.sting.utils.collections.DefaultHashMap;
|
|||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
@ -170,10 +169,6 @@ public class StandardCallerArgumentCollection {
|
|||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false)
|
||||
public PrintStream contaminationLog = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName = "logExactCalls", doc="x", required=false)
|
||||
public File exactCallsLog = null;
|
||||
|
|
@ -192,7 +187,6 @@ public class StandardCallerArgumentCollection {
|
|||
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
|
||||
this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION;
|
||||
this.CONTAMINATION_FRACTION_FILE=SCAC.CONTAMINATION_FRACTION_FILE;
|
||||
this.contaminationLog = SCAC.contaminationLog;
|
||||
this.exactCallsLog = SCAC.exactCallsLog;
|
||||
this.sampleContamination=SCAC.sampleContamination;
|
||||
this.AFmodel = SCAC.AFmodel;
|
||||
|
|
|
|||
|
|
@ -183,6 +183,6 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
|
|||
* @param headerLines
|
||||
*/
|
||||
public void initialize ( AnnotatorCompatible walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||
useDithering = ! toolkit.getArguments().disableRandomization;
|
||||
useDithering = ! toolkit.getArguments().disableDithering;
|
||||
}
|
||||
}
|
||||
|
|
@ -145,7 +145,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
final ReadBackedPileup pileup = context.getBasePileup();
|
||||
if (pileup != null) {
|
||||
final GenotypeBuilder b = new GenotypeBuilder(sample.getKey());
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()), UAC.contaminationLog);
|
||||
final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey()), UAC.getSampleContamination().get(sample.getKey()));
|
||||
b.PL(genotypeLikelihoods);
|
||||
b.DP(getFilteredDepth(pileup));
|
||||
genotypes.add(b.make());
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup();
|
||||
final Double contamination = UAC.getSampleContamination().get(sample.getKey());
|
||||
if( contamination > 0.0 ) //no need to enter if no contamination reduction
|
||||
pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup,contamination, UAC.contaminationLog);
|
||||
pileup = perReadAlleleLikelihoodMap.createPerAlleleDownsampledBasePileup(pileup, contamination);
|
||||
if ( useBAQedPileup )
|
||||
pileup = createBAQedPileup(pileup);
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
public class GenotypingEngine {
|
||||
|
|
@ -196,13 +195,13 @@ public class GenotypingEngine {
|
|||
logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
|
||||
}
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION );
|
||||
|
||||
final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC );
|
||||
final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
if( call != null ) {
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap :
|
||||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0, UG_engine.getUAC().contaminationLog ) );
|
||||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||
|
||||
VariantContext annotatedCall = call;
|
||||
|
|
@ -406,8 +405,7 @@ public class GenotypingEngine {
|
|||
// BUGBUG: ugh, too complicated
|
||||
protected Map<String, PerReadAlleleLikelihoodMap> convertHaplotypeReadMapToAlleleReadMap( final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
|
||||
final Map<Allele, List<Haplotype>> alleleMapper,
|
||||
final double downsamplingFraction,
|
||||
final PrintStream downsamplingLog ) {
|
||||
final double downsamplingFraction ) {
|
||||
|
||||
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new LinkedHashMap<String, PerReadAlleleLikelihoodMap>();
|
||||
for( final Map.Entry<String, PerReadAlleleLikelihoodMap> haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample
|
||||
|
|
@ -424,7 +422,7 @@ public class GenotypingEngine {
|
|||
perReadAlleleLikelihoodMap.add(readEntry.getKey(), alleleMapperEntry.getKey(), maxLikelihood);
|
||||
}
|
||||
}
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog); // perform contamination downsampling
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction); // perform contamination downsampling
|
||||
alleleReadMap.put(haplotypeReadMapEntry.getKey(), perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,7 +61,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -213,13 +212,12 @@ public class PairHMMIndelErrorModel {
|
|||
final ReferenceContext ref,
|
||||
final int eventLength,
|
||||
final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap,
|
||||
final double downsamplingFraction,
|
||||
final PrintStream downsamplingLog) {
|
||||
final double downsamplingFraction) {
|
||||
final int numHaplotypes = haplotypeMap.size();
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts);
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog);
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction);
|
||||
return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,8 +56,8 @@ import java.util.List;
|
|||
|
||||
public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("118ed5b54fc9ce1cde89f06a20afebef"));
|
||||
Arrays.asList("d8b0c5be39ec6b239641c2f2646d2bc3"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -88,7 +88,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("6ef59013331bc031ea37807b325d7d2c"));
|
||||
Arrays.asList("d9572a227ccb13a6baa6dc4fb65bc1e5"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("dd3ee4675377191e34aaf67335e0219a"));
|
||||
Arrays.asList("54e13f696f56eb742bf449ad11d0dc5f"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("bb06ef8262f91664b7d2fe7e1e5df195"));
|
||||
Arrays.asList("16d975480ff1e689113171805b916b62"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("0a2a8cc2d1a79e84624836a31de5491c"));
|
||||
Arrays.asList("60ed3f8d5bc3f765e6ce3fa698b68bb7"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("939f80c6d2dfb592956aed3bdeaf319d"));
|
||||
Arrays.asList("e87f5c76661527ef7aa44e528fe19573"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("fc937f92e59dfe07b894411b5dfc166a"));
|
||||
Arrays.asList("264325878b988acc11d8e5d9d2ba0b7f"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("41ad9e0edca4b9987390ba5c07f39e4a"));
|
||||
Arrays.asList("98abcfb0a008050eba8b9c285a25b2a0"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -200,7 +200,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotype0Length() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -I " + privateTestDir + "haplotype0.bam -L 20:47507681 -R " + b37KGReference + " -baq CALCULATE_AS_NECESSARY -glm BOTH -o /dev/null",
|
||||
"-T UnifiedGenotyper --disableDithering -I " + privateTestDir + "haplotype0.bam -L 20:47507681 -R " + b37KGReference + " -baq CALCULATE_AS_NECESSARY -glm BOTH -o /dev/null",
|
||||
0,
|
||||
Collections.<String>emptyList());
|
||||
executeTest("testHaplotype0Length", spec);
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ import java.util.Collections;
|
|||
|
||||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam";
|
||||
private final static String baseCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam";
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -73,15 +73,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
|
||||
Arrays.asList("30be17df00acc8a92223f51fe7c1bdf7"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("55760482335497086458b09e415ecf54"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("4aa226c00a242047cf427d0919003048"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -89,15 +89,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("938e888a40182878be4c3cc4859adb69"));
|
||||
Arrays.asList("17f65eca1e6c1f06919a58f230b6d8d3"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("50937942e3d228614d2531c3be237709"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
|
|
@ -111,17 +111,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
|
||||
testOutputParameters("-sites_only", "48cd40d3994911a6f2609bfd375e1d2d");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "5649f72de04e1391e0f2bb86843d3d72");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "28f40ce47651f504158fc4e5bb58df4b");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "cb151bb9e90680b12714d481091ed209");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "5259dafaa1b57d9489003b16a48e35f8");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -135,7 +135,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
|
||||
Arrays.asList("918109938ef355d759dafc3ebb47d8a5"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -143,7 +143,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNoPrior() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 -noPrior", 1,
|
||||
Arrays.asList("422656266117f8d01e17e5c491c49a24"));
|
||||
Arrays.asList("7ac60bdc355d97c0939e644b58de47d7"));
|
||||
executeTest("test no prior 1", spec1);
|
||||
|
||||
}
|
||||
|
|
@ -155,12 +155,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "ffc1f83a045dc09360e11de7a8efd159" );
|
||||
testHeterozosity( 0.01, "3b66f82dbb746875638e076bf51a1583" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "5426a98df9f5fd70aef295d889c4e4f1" );
|
||||
testHeterozosity( 1.0 / 1850, "714c1795334c7c62c046a75479381ae6" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -176,7 +176,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "d5a7326fdcf6d441b73c381912ad3a2a";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "6f79205f7ed8006470f056f6805db6c8";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -232,7 +232,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("0a4a78da876bfa3d42170249a94357b4"));
|
||||
Arrays.asList("d995b76adc3766889f7c2c88da14055c"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("89182fd4d9532ab4b2a0a84bfb557089"));
|
||||
Arrays.asList("9669e1643d22d5ad047b3941aeefd6db"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -281,8 +281,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
|
||||
Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
|
||||
Arrays.asList("2ae3fd39c53a6954d32faed8703adfe8"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ import java.util.Arrays;
|
|||
|
||||
public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("52b6086f4597da5b35ab902bea4066fc"));
|
||||
Arrays.asList("e3efd1917192ea743ac1e9958aa0a98f"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("5b31b811072a4df04524e13604015f9b"));
|
||||
Arrays.asList("ebfcc3dd8c1788929cb50050c5d456df"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
|
||||
Arrays.asList("698e54aeae3130779d246b9480a4052c"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -88,22 +88,22 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("33ab66c2f062cfa1f7fcc077165f778c"));
|
||||
Arrays.asList("aaadb2a355d87344eabb6ac4495a11e4"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("28bfbff3da3af43d6a1eff673e5cb0f8"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("09a1a4d4bf0289bcc5e8a958f783a989"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
|
@ -111,16 +111,16 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
@Test
|
||||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("a9edd04374ee9c42970291f39a50c191"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("57a1bb44967988f2b7ae7779127990ae"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("6fc32ca9de769060f3c2a3d94f8f2f91"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("3011c20165951ca43c8a4e86a5835dbd"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,25 +62,25 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("d55d37e2e86aefb91e47183d2c7dede8"));
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("da3fd775c8add1f7962baabf06b7d372"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "b424779c6609cb727a675bdd301290e6");
|
||||
testReducedCalling("SNP", "76244ab1be60814f1412e6cd09e546cc");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "38c3d14cb9086f7355788d3db9b8ff16");
|
||||
testReducedCalling("INDEL", "9a986b98ed014576ce923e07452447f4");
|
||||
}
|
||||
|
||||
|
||||
private void testReducedCalling(final String model, final String md5) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-10,500,000 -glm " + model, 1,
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-10,500,000 -glm " + model, 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test calling on a ReducedRead BAM with " + model, spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCal
|
|||
public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4";
|
||||
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec);
|
||||
}
|
||||
|
|
@ -68,7 +68,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
|
||||
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
|
||||
}
|
||||
|
|
@ -80,7 +80,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -minPruning 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexGGA: args=" + args, spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||
|
||||
private void HCTest(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCaller: args=" + args, spec);
|
||||
}
|
||||
|
|
@ -105,7 +105,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10,005,000-10,025,000 --no_cmdline_in_header -o %s -minPruning 2";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerIndelQualityScores: args=" + args, spec);
|
||||
}
|
||||
|
|
@ -120,7 +120,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final GenomeLocParser parser = new GenomeLocParser(fasta.getSequenceDictionary());
|
||||
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10,001,603-10,001,642 -L 20:10,001,653-10,001,742 --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
for( final File vcf : executeTest("testHaplotypeCallerNearbySmallIntervals: args=" + args, spec).getFirst() ) {
|
||||
if( containsDuplicateRecord(vcf, parser) ) {
|
||||
|
|
@ -158,14 +158,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
// any of the calls in that region because it is so messy.
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("0689d2c202849fd05617648eaf429b9a"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("cb190c935541ebb9f660f713a882b922"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
|
@ -173,7 +173,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestDoesNotFailOnBadRefBase() {
|
||||
// don't care about the output - just want to make sure it doesn't fail
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2 -stand_emit_conf 2";
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "NA12878.readsOverBadBase.chr3.bam") + " --no_cmdline_in_header -o /dev/null -L 3:60830000-60840000 --minPruning 3 -stand_call_conf 2 -stand_emit_conf 2";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Collections.<String>emptyList());
|
||||
executeTest("HCTestDoesNotFailOnBadRefBase: ", spec);
|
||||
}
|
||||
|
|
@ -187,7 +187,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("3c87eb93ffe3a0166aca753050b981e1"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
|
@ -195,7 +195,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("8adfa8a27a312760dab50787da595c57"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -211,7 +211,7 @@ public class PerReadAlleleLikelihoodMapUnitTest extends BaseTest {
|
|||
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),pileup.depthOfCoverage()+10);
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap().get(base_A).size(),60);
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(0.1,null);
|
||||
perReadAlleleLikelihoodMap.performPerAlleleDownsampling(0.1);
|
||||
Assert.assertEquals(perReadAlleleLikelihoodMap.size(),(int) (0.9*(pileup.depthOfCoverage()+10)));
|
||||
|
||||
Map<Allele,List<GATKSAMRecord>> downsampledStrat = perReadAlleleLikelihoodMap.getAlleleStratifiedReadMap();
|
||||
|
|
|
|||
|
|
@ -104,8 +104,9 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||
public boolean nonDeterministicRandomSeed = false;
|
||||
|
||||
@Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.")
|
||||
public boolean disableRandomization = false;
|
||||
@Hidden
|
||||
@Argument(fullName = "disableDithering",doc="Completely eliminates randomized dithering from rank sum tests. To be used in the testing framework where dynamic parallelism can result in differing numbers of calls to the random generator.")
|
||||
public boolean disableDithering = false;
|
||||
|
||||
@Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false)
|
||||
public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
@ -38,65 +37,62 @@ import org.broadinstitute.variant.variantcontext.Allele;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
public class AlleleBiasedDownsamplingUtils {
|
||||
|
||||
// define this class so that we can use Java generics below
|
||||
private final static class PileupElementList extends ArrayList<PileupElement> {}
|
||||
|
||||
/**
|
||||
* Computes an allele biased version of the given pileup
|
||||
*
|
||||
* @param pileup the original pileup
|
||||
* @param downsamplingFraction the fraction of total reads to remove per allele
|
||||
* @param log logging output
|
||||
* @return allele biased pileup
|
||||
*/
|
||||
public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
|
||||
public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) {
|
||||
// special case removal of all or no reads
|
||||
if ( downsamplingFraction <= 0.0 )
|
||||
return pileup;
|
||||
if ( downsamplingFraction >= 1.0 )
|
||||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
|
||||
|
||||
final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
|
||||
final PileupElementList[] alleleStratifiedElements = new PileupElementList[4];
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
|
||||
alleleStratifiedElements[i] = new PileupElementList();
|
||||
|
||||
// start by stratifying the reads by the alleles they represent at this position
|
||||
boolean sawReducedRead = false;
|
||||
for ( final PileupElement pe : pileup ) {
|
||||
// we do not want to remove a reduced read
|
||||
if ( !pe.getRead().isReducedRead() ) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
|
||||
if ( baseIndex != -1 )
|
||||
alleleStratifiedElements[baseIndex].add(pe);
|
||||
}
|
||||
if ( pe.getRead().isReducedRead() )
|
||||
sawReducedRead = true;
|
||||
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
|
||||
if ( baseIndex != -1 )
|
||||
alleleStratifiedElements[baseIndex].add(pe);
|
||||
}
|
||||
|
||||
// make a listing of allele counts
|
||||
final int[] alleleCounts = new int[4];
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleCounts[i] = alleleStratifiedElements[i].size();
|
||||
// make a listing of allele counts and calculate the total count
|
||||
final int[] alleleCounts = calculateAlleleCounts(alleleStratifiedElements, sawReducedRead);
|
||||
final int totalAlleleCount = (int)MathUtils.sum(alleleCounts);
|
||||
|
||||
// do smart down-sampling
|
||||
int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor
|
||||
final int numReadsToRemove = (int)(totalAlleleCount * downsamplingFraction); // floor
|
||||
final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
|
||||
|
||||
final HashSet<PileupElement> readsToRemove = new HashSet<PileupElement>(numReadsToRemove);
|
||||
for ( int i = 0; i < 4; i++ ) {
|
||||
final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
|
||||
final PileupElementList alleleList = alleleStratifiedElements[i];
|
||||
// if we don't need to remove any reads, then don't
|
||||
if ( alleleList.size() > targetAlleleCounts[i] )
|
||||
readsToRemove.addAll(downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log));
|
||||
if ( alleleCounts[i] > targetAlleleCounts[i] )
|
||||
readsToRemove.addAll(downsampleElements(alleleList, alleleCounts[i], alleleCounts[i] - targetAlleleCounts[i]));
|
||||
}
|
||||
|
||||
// clean up pointers so memory can be garbage collected if needed
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i].clear();
|
||||
|
||||
// we need to keep the reads sorted because the FragmentUtils code will expect them in coordinate order and will fail otherwise
|
||||
final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(pileup.getNumberOfElements() - numReadsToRemove);
|
||||
final List<PileupElement> readsToKeep = new ArrayList<PileupElement>(totalAlleleCount - numReadsToRemove);
|
||||
for ( final PileupElement pe : pileup ) {
|
||||
if ( !readsToRemove.contains(pe) ) {
|
||||
readsToKeep.add(pe);
|
||||
|
|
@ -106,6 +102,26 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(readsToKeep));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates actual allele counts for each allele (which can be different than the list size when reduced reads are present)
|
||||
*
|
||||
* @param alleleStratifiedElements pileup elements stratified by allele
|
||||
* @param sawReducedRead is at least one read a reduced read?
|
||||
* @return non-null int array representing allele counts
|
||||
*/
|
||||
private static int[] calculateAlleleCounts(final PileupElementList[] alleleStratifiedElements, final boolean sawReducedRead) {
|
||||
final int[] alleleCounts = new int[alleleStratifiedElements.length];
|
||||
for ( int i = 0; i < alleleStratifiedElements.length; i++ ) {
|
||||
if ( !sawReducedRead ) {
|
||||
alleleCounts[i] = alleleStratifiedElements[i].size();
|
||||
} else {
|
||||
for ( final PileupElement pe : alleleStratifiedElements[i] )
|
||||
alleleCounts[i] += pe.getRepresentativeCount();
|
||||
}
|
||||
}
|
||||
return alleleCounts;
|
||||
}
|
||||
|
||||
private static int scoreAlleleCounts(final int[] alleleCounts) {
|
||||
if ( alleleCounts.length < 2 )
|
||||
return 0;
|
||||
|
|
@ -128,11 +144,11 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Computes an allele biased version of the given pileup
|
||||
* Computes an allele biased version of the allele counts for a given pileup
|
||||
*
|
||||
* @param alleleCounts the original pileup
|
||||
* @param numReadsToRemove fraction of total reads to remove per allele
|
||||
* @return allele biased pileup
|
||||
* @param alleleCounts the allele counts for the original pileup
|
||||
* @param numReadsToRemove number of total reads to remove per allele
|
||||
* @return non-null array of new counts needed per allele
|
||||
*/
|
||||
protected static int[] runSmartDownsampling(final int[] alleleCounts, final int numReadsToRemove) {
|
||||
final int numAlleles = alleleCounts.length;
|
||||
|
|
@ -169,36 +185,50 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
/**
|
||||
* Performs allele biased down-sampling on a pileup and computes the list of elements to remove
|
||||
*
|
||||
* @param elements original list of records
|
||||
* @param elements original list of pileup elements
|
||||
* @param originalElementCount original count of elements (taking reduced reads into account)
|
||||
* @param numElementsToRemove the number of records to remove
|
||||
* @param log logging output
|
||||
* @return the list of pileup elements TO REMOVE
|
||||
*/
|
||||
private static <T> List<T> downsampleElements(final List<T> elements, final int numElementsToRemove, final PrintStream log) {
|
||||
ArrayList<T> elementsToRemove = new ArrayList<T>(numElementsToRemove);
|
||||
|
||||
protected static List<PileupElement> downsampleElements(final List<PileupElement> elements, final int originalElementCount, final int numElementsToRemove) {
|
||||
// are there no elements to remove?
|
||||
if ( numElementsToRemove == 0 )
|
||||
return elementsToRemove;
|
||||
return Collections.<PileupElement>emptyList();
|
||||
|
||||
final ArrayList<PileupElement> elementsToRemove = new ArrayList<PileupElement>(numElementsToRemove);
|
||||
|
||||
// should we remove all of the elements?
|
||||
final int pileupSize = elements.size();
|
||||
if ( numElementsToRemove == pileupSize ) {
|
||||
logAllElements(elements, log);
|
||||
if ( numElementsToRemove >= originalElementCount ) {
|
||||
elementsToRemove.addAll(elements);
|
||||
return elementsToRemove;
|
||||
}
|
||||
|
||||
// create a bitset describing which elements to remove
|
||||
final BitSet itemsToRemove = new BitSet(pileupSize);
|
||||
for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
|
||||
final BitSet itemsToRemove = new BitSet(originalElementCount);
|
||||
for ( final Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(originalElementCount, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
}
|
||||
|
||||
for ( int i = 0; i < pileupSize; i++ ) {
|
||||
if ( itemsToRemove.get(i) ) {
|
||||
final T element = elements.get(i);
|
||||
logElement(element, log);
|
||||
int currentBitSetIndex = 0;
|
||||
for ( final PileupElement element : elements ) {
|
||||
|
||||
final int representativeCount = element.getRepresentativeCount();
|
||||
|
||||
// if it's a reduced read, we need to be smart about how we down-sample
|
||||
if ( representativeCount > 1 ) {
|
||||
// count how many bits are set over the span represented by this read
|
||||
int setBits = 0;
|
||||
for ( int i = 0; i < representativeCount; i++ )
|
||||
setBits += itemsToRemove.get(currentBitSetIndex++) ? 1 : 0;
|
||||
|
||||
// remove that count from the count of the reduced read
|
||||
if ( setBits == representativeCount )
|
||||
elementsToRemove.add(element);
|
||||
else
|
||||
element.adjustRepresentativeCount(-1 * setBits);
|
||||
}
|
||||
// otherwise it's trivial: remove if the corresponding bit is set
|
||||
else if ( itemsToRemove.get(currentBitSetIndex++) ) {
|
||||
elementsToRemove.add(element);
|
||||
}
|
||||
}
|
||||
|
|
@ -211,10 +241,9 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
*
|
||||
* @param alleleReadMap original list of records per allele
|
||||
* @param downsamplingFraction the fraction of total reads to remove per allele
|
||||
* @param log logging output
|
||||
* @return list of reads TO REMOVE from allele biased down-sampling
|
||||
*/
|
||||
public static List<GATKSAMRecord> selectAlleleBiasedReads(final Map<Allele, List<GATKSAMRecord>> alleleReadMap, final double downsamplingFraction, final PrintStream log) {
|
||||
public static List<GATKSAMRecord> selectAlleleBiasedReads(final Map<Allele, List<GATKSAMRecord>> alleleReadMap, final double downsamplingFraction) {
|
||||
int totalReads = 0;
|
||||
for ( final List<GATKSAMRecord> reads : alleleReadMap.values() )
|
||||
totalReads += reads.size();
|
||||
|
|
@ -225,6 +254,8 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
final List<Allele> alleles = new ArrayList<Allele>(alleleReadMap.keySet());
|
||||
alleles.remove(Allele.NO_CALL); // ignore the no-call bin
|
||||
final int numAlleles = alleles.size();
|
||||
|
||||
// TODO -- if we ever decide to make this work for reduced reads, this will need to use the representative counts instead
|
||||
final int[] alleleCounts = new int[numAlleles];
|
||||
for ( int i = 0; i < numAlleles; i++ )
|
||||
alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size();
|
||||
|
|
@ -234,38 +265,52 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
|
||||
final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove);
|
||||
for ( int i = 0; i < numAlleles; i++ ) {
|
||||
final List<GATKSAMRecord> alleleBin = alleleReadMap.get(alleles.get(i));
|
||||
|
||||
if ( alleleBin.size() > targetAlleleCounts[i] ) {
|
||||
readsToRemove.addAll(downsampleElements(alleleBin, alleleBin.size() - targetAlleleCounts[i], log));
|
||||
if ( alleleCounts[i] > targetAlleleCounts[i] ) {
|
||||
readsToRemove.addAll(downsampleElements(alleleReadMap.get(alleles.get(i)), alleleCounts[i] - targetAlleleCounts[i]));
|
||||
}
|
||||
}
|
||||
|
||||
return readsToRemove;
|
||||
}
|
||||
|
||||
private static <T> void logAllElements(final List<T> elements, final PrintStream log) {
|
||||
if ( log != null ) {
|
||||
for ( final T obj : elements ) {
|
||||
logElement(obj, log);
|
||||
}
|
||||
/**
|
||||
* Performs allele biased down-sampling on a pileup and computes the list of elements to remove
|
||||
*
|
||||
* @param reads original list of records
|
||||
* @param numElementsToRemove the number of records to remove
|
||||
* @return the list of pileup elements TO REMOVE
|
||||
*/
|
||||
protected static List<GATKSAMRecord> downsampleElements(final List<GATKSAMRecord> reads, final int numElementsToRemove) {
|
||||
// are there no elements to remove?
|
||||
if ( numElementsToRemove == 0 )
|
||||
return Collections.<GATKSAMRecord>emptyList();
|
||||
|
||||
final ArrayList<GATKSAMRecord> elementsToRemove = new ArrayList<GATKSAMRecord>(numElementsToRemove);
|
||||
final int originalElementCount = reads.size();
|
||||
|
||||
// should we remove all of the elements?
|
||||
if ( numElementsToRemove >= originalElementCount ) {
|
||||
elementsToRemove.addAll(reads);
|
||||
return elementsToRemove;
|
||||
}
|
||||
}
|
||||
|
||||
private static <T> void logElement(final T obj, final PrintStream log) {
|
||||
if ( log != null ) {
|
||||
|
||||
final GATKSAMRecord read;
|
||||
if ( obj instanceof PileupElement )
|
||||
read = ((PileupElement)obj).getRead();
|
||||
else
|
||||
read = (GATKSAMRecord)obj;
|
||||
|
||||
final SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
log.println(String.format("%s\t%s\t%s\t%s", read.getReadName(), readGroup.getSample(), readGroup.getLibrary(), readGroup.getPlatformUnit()));
|
||||
// create a bitset describing which elements to remove
|
||||
final BitSet itemsToRemove = new BitSet(originalElementCount);
|
||||
for ( final Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(originalElementCount, numElementsToRemove) ) {
|
||||
itemsToRemove.set(selectedIndex);
|
||||
}
|
||||
}
|
||||
|
||||
int currentBitSetIndex = 0;
|
||||
for ( final GATKSAMRecord read : reads ) {
|
||||
if ( read.isReducedRead() )
|
||||
throw new IllegalStateException("Allele-biased downsampling of reduced reads has not been implemented for a list of GATKSAMRecords");
|
||||
|
||||
if ( itemsToRemove.get(currentBitSetIndex++) )
|
||||
elementsToRemove.add(read);
|
||||
}
|
||||
|
||||
return elementsToRemove;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create sample-contamination maps from file
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.genotyper;
|
|||
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -44,9 +42,8 @@ import java.util.*;
|
|||
* For each read, this holds underlying alleles represented by an aligned read, and corresponding relative likelihood.
|
||||
*/
|
||||
public class PerReadAlleleLikelihoodMap {
|
||||
private final static Logger logger = Logger.getLogger(PerReadAlleleLikelihoodMap.class);
|
||||
protected List<Allele> alleles;
|
||||
protected Map<GATKSAMRecord, Map<Allele, Double>> likelihoodReadMap;
|
||||
protected final List<Allele> alleles;
|
||||
protected final Map<GATKSAMRecord, Map<Allele, Double>> likelihoodReadMap;
|
||||
|
||||
public PerReadAlleleLikelihoodMap() {
|
||||
likelihoodReadMap = new LinkedHashMap<GATKSAMRecord,Map<Allele,Double>>();
|
||||
|
|
@ -78,17 +75,16 @@ public class PerReadAlleleLikelihoodMap {
|
|||
|
||||
}
|
||||
|
||||
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
|
||||
return AlleleBiasedDownsamplingUtils.createAlleleBiasedBasePileup(pileup, downsamplingFraction, log);
|
||||
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) {
|
||||
return AlleleBiasedDownsamplingUtils.createAlleleBiasedBasePileup(pileup, downsamplingFraction);
|
||||
}
|
||||
|
||||
/**
|
||||
* For each allele "a" , identify those reads whose most likely allele is "a", and remove a "downsamplingFraction" proportion
|
||||
* of those reads from the "likelihoodReadMap". This is used for e.g. sample contamination
|
||||
* @param downsamplingFraction - the fraction of supporting reads to remove from each allele. If <=0 all reads kept, if >=1 all reads tossed.
|
||||
* @param log - a PrintStream to log the removed reads to (passed through to the utility function)
|
||||
*/
|
||||
public void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log) {
|
||||
public void performPerAlleleDownsampling(final double downsamplingFraction) {
|
||||
// special case removal of all or no reads
|
||||
if ( downsamplingFraction <= 0.0 )
|
||||
return;
|
||||
|
|
@ -101,7 +97,7 @@ public class PerReadAlleleLikelihoodMap {
|
|||
final Map<Allele, List<GATKSAMRecord>> alleleReadMap = getAlleleStratifiedReadMap();
|
||||
|
||||
// compute the reads to remove and actually remove them
|
||||
final List<GATKSAMRecord> readsToRemove = AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(alleleReadMap, downsamplingFraction, log);
|
||||
final List<GATKSAMRecord> readsToRemove = AlleleBiasedDownsamplingUtils.selectAlleleBiasedReads(alleleReadMap, downsamplingFraction);
|
||||
for ( final GATKSAMRecord read : readsToRemove )
|
||||
likelihoodReadMap.remove(read);
|
||||
}
|
||||
|
|
@ -117,7 +113,8 @@ public class PerReadAlleleLikelihoodMap {
|
|||
alleleReadMap.put(allele, new ArrayList<GATKSAMRecord>());
|
||||
|
||||
for ( final Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
|
||||
// do not remove reduced reads!
|
||||
// TODO -- come up with a strategy for down-sampling reduced reads
|
||||
// Currently we are unable to remove reduced reads because their representative base count differs throughout the read
|
||||
if ( !entry.getKey().isReducedRead() ) {
|
||||
final MostLikelyAllele bestAllele = getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele.isInformative() )
|
||||
|
|
|
|||
|
|
@ -303,7 +303,7 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
* this being a reduced read and a deletion, we return the average number of elements between the left
|
||||
* and right elements to the deletion. We assume the deletion to be left aligned.
|
||||
*
|
||||
* @return
|
||||
* @return the representative count
|
||||
*/
|
||||
public int getRepresentativeCount() {
|
||||
if (read.isReducedRead()) {
|
||||
|
|
@ -318,6 +318,19 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjusts the representative count of this pileup element.
|
||||
* Throws an exception if this element does not represent a reduced read.
|
||||
*
|
||||
* @param adjustmentFactor how much to adjust the representative count (can be positive or negative)
|
||||
*/
|
||||
public void adjustRepresentativeCount(final int adjustmentFactor) {
|
||||
if ( read.isReducedRead() )
|
||||
read.adjustReducedCount(offset, adjustmentFactor);
|
||||
else
|
||||
throw new IllegalArgumentException("Trying to adjust the representative count of a read that is not reduced");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cigar element aligning this element to the genome
|
||||
* @return a non-null CigarElement
|
||||
|
|
|
|||
|
|
@ -400,7 +400,40 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* @return the number of bases corresponding to the i'th base of the reduced read
|
||||
*/
|
||||
public final byte getReducedCount(final int i) {
|
||||
return getReducedReadCounts()[i];
|
||||
if ( !isReducedRead() )
|
||||
throw new IllegalArgumentException("error trying to retrieve the reduced count from a read that is not reduced");
|
||||
final byte[] reducedCounts = getReducedReadCounts();
|
||||
return reducedCounts[i];
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of bases corresponding the i'th base of the reduced read.
|
||||
*
|
||||
* @param i the read based coordinate inside the read
|
||||
* @param count the new count
|
||||
*/
|
||||
public final void setReducedCount(final int i, final byte count) {
|
||||
if ( count < 0 )
|
||||
throw new IllegalArgumentException("the reduced count cannot be set to a negative value");
|
||||
if ( !isReducedRead() )
|
||||
throw new IllegalArgumentException("error trying to set the reduced count for a read that is not reduced");
|
||||
final byte[] reducedCounts = getReducedReadCounts();
|
||||
reducedCounts[i] = count;
|
||||
setReducedReadCountsTag(reducedCounts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of bases corresponding the i'th base of the reduced read.
|
||||
*
|
||||
* @param i the read based coordinate inside the read
|
||||
* @param adjustmentFactor how much to add/subtract to the current count
|
||||
*/
|
||||
public final void adjustReducedCount(final int i, final int adjustmentFactor) {
|
||||
if ( !isReducedRead() )
|
||||
throw new IllegalArgumentException("error trying to set the reduced count for a read that is not reduced");
|
||||
final byte[] reducedCounts = getReducedReadCounts();
|
||||
final byte newCount = (byte)(reducedCounts[i] + adjustmentFactor);
|
||||
setReducedCount(i, newCount);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -25,18 +25,21 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.downsampling;
|
||||
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -115,6 +118,51 @@ public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest {
|
|||
return true;
|
||||
}
|
||||
|
||||
@DataProvider(name = "BiasedDownsamplingTest")
|
||||
public Object[][] makeBiasedDownsamplingTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
|
||||
for ( final int originalNormalCount : Arrays.asList(0, 1, 2, 10, 1000) ) {
|
||||
for ( final int originalReducedCount : Arrays.asList(0, 1, 2, 10, 100) ) {
|
||||
for ( final int indexToPutReducedRead : Arrays.asList(0, 2, originalNormalCount) ) {
|
||||
if ( originalReducedCount == 0 || indexToPutReducedRead > originalNormalCount )
|
||||
continue;
|
||||
for ( final int toRemove : Arrays.asList(0, 1, 2, 10, 1000) ) {
|
||||
if ( toRemove <= originalNormalCount + originalReducedCount )
|
||||
tests.add(new Object[]{header, originalNormalCount, originalReducedCount, indexToPutReducedRead, toRemove});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BiasedDownsamplingTest")
|
||||
public void testBiasedDownsampling(final SAMFileHeader header, final int originalNormalCount, final int originalReducedCount, final int indexToPutReducedRead, final int toRemove) {
|
||||
|
||||
final LinkedList<PileupElement> elements = new LinkedList<PileupElement>();
|
||||
for ( int i = 0; i < originalNormalCount; i++ ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 1);
|
||||
elements.add(new PileupElement(read, 0, new CigarElement(1, CigarOperator.M), 0, 0));
|
||||
}
|
||||
if ( originalReducedCount > 0 ) {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 1);
|
||||
read.setReducedReadCountsTag(new byte[]{(byte)originalReducedCount});
|
||||
elements.add(indexToPutReducedRead, new PileupElement(read, 0, new CigarElement(1, CigarOperator.M), 0, 0));
|
||||
}
|
||||
|
||||
final List<PileupElement> result = AlleleBiasedDownsamplingUtils.downsampleElements(elements, originalNormalCount + originalReducedCount, toRemove);
|
||||
int pileupCount = 0;
|
||||
for ( final PileupElement pe : elements ) // reduced reads may have gotten modified
|
||||
pileupCount += pe.getRepresentativeCount();
|
||||
for ( final PileupElement pe : result )
|
||||
pileupCount -= pe.getRepresentativeCount();
|
||||
|
||||
Assert.assertEquals(pileupCount, originalNormalCount + originalReducedCount - toRemove);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadContaminationFileDetails(){
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public class GATKSAMRecordUnitTest extends BaseTest {
|
|||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?";
|
||||
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
|
||||
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
||||
final private static byte[] REDUCED_READ_COUNTS_OFFSETS = new byte[]{10, 10, 20, 30, -9}; // just the offsets
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
|
|
@ -52,7 +52,7 @@ public class GATKSAMRecordUnitTest extends BaseTest {
|
|||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||
reducedRead.setReadBases(BASES.getBytes());
|
||||
reducedRead.setBaseQualityString(QUALS);
|
||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
|
||||
reducedRead.setReducedReadCountsTag(REDUCED_READ_COUNTS_OFFSETS);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -66,6 +66,36 @@ public class GATKSAMRecordUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testGetReducedCountOnNormalRead() {
|
||||
read.getReducedCount(0);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testSetReducedTagOnNormalRead() {
|
||||
read.setReducedCount(0, (byte)2);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = IllegalArgumentException.class)
|
||||
public void testSetReducedCountToNegativeNumber() {
|
||||
reducedRead.setReducedCount(0, (byte)1000);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetReducedTagOnReducedRead() {
|
||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||
final byte newCount = (byte)i;
|
||||
reducedRead.setReducedCount(i, newCount);
|
||||
Assert.assertEquals(reducedRead.getReducedCount(i), newCount, "Reduced read count not set to the expected value at " + i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < reducedRead.getReadLength(); i++) {
|
||||
final int newCount = reducedRead.getReducedCount(i) + i;
|
||||
reducedRead.adjustReducedCount(i, i);
|
||||
Assert.assertEquals(reducedRead.getReducedCount(i), newCount, "Reduced read count not set to the expected value at " + i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedReadPileupElement() {
|
||||
PileupElement readp = LocusIteratorByState.createPileupForReadAndOffset(read, 0);
|
||||
|
|
|
|||
Loading…
Reference in New Issue