Merge fix
This commit is contained in:
commit
02b790c8db
|
|
@ -37,12 +37,6 @@ import java.util.*;
|
|||
|
||||
public class AlleleBiasedDownsamplingUtils {
|
||||
|
||||
private static final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
|
||||
static {
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes an allele biased version of the given pileup
|
||||
*
|
||||
|
|
@ -51,15 +45,23 @@ public class AlleleBiasedDownsamplingUtils {
|
|||
* @param log logging output
|
||||
* @return allele biased pileup
|
||||
*/
|
||||
public synchronized static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
|
||||
public static ReadBackedPileup createAlleleBiasedBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log) {
|
||||
// special case removal of all or no reads
|
||||
if ( downsamplingFraction <= 0.0 )
|
||||
return pileup;
|
||||
if ( downsamplingFraction >= 1.0 )
|
||||
return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>());
|
||||
|
||||
final ArrayList<PileupElement>[] alleleStratifiedElements = new ArrayList[4];
|
||||
for ( int i = 0; i < 4; i++ )
|
||||
alleleStratifiedElements[i] = new ArrayList<PileupElement>();
|
||||
|
||||
// start by stratifying the reads by the alleles they represent at this position
|
||||
for( final PileupElement pe : pileup ) {
|
||||
// abort if we have a reduced read - we do not want to remove it!
|
||||
if ( pe.getRead().isReducedRead() )
|
||||
return pileup;
|
||||
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
|
||||
if ( baseIndex != -1 )
|
||||
alleleStratifiedElements[baseIndex].add(pe);
|
||||
|
|
|
|||
|
|
@ -39,57 +39,12 @@ public class AdvancedRecalibrationEngine extends StandardRecalibrationEngine imp
|
|||
|
||||
// optimization: only allocate temp arrays once per thread
|
||||
private final ThreadLocal<byte[]> threadLocalTempQualArray = new ThreadLocalArray<byte[]>(EventType.values().length, byte.class);
|
||||
private final ThreadLocal<boolean[]> threadLocalTempErrorArray = new ThreadLocalArray<boolean[]>(EventType.values().length, boolean.class);
|
||||
private final ThreadLocal<double[]> threadLocalTempFractionalErrorArray = new ThreadLocalArray<double[]>(EventType.values().length, double.class);
|
||||
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables) {
|
||||
super.initialize(covariates, recalibrationTables);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
|
||||
* adding one to the number of observations and potentially one to the number of mismatches for all three
|
||||
* categories (mismatches, insertions and deletions).
|
||||
*
|
||||
* @param pileupElement The pileup element to update
|
||||
* @param refBase The reference base at this locus
|
||||
*/
|
||||
@Override
|
||||
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
|
||||
final int offset = pileupElement.getOffset();
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
|
||||
|
||||
byte[] tempQualArray = threadLocalTempQualArray.get();
|
||||
boolean[] tempErrorArray = threadLocalTempErrorArray.get();
|
||||
|
||||
tempQualArray[EventType.BASE_SUBSTITUTION.index] = pileupElement.getQual();
|
||||
tempErrorArray[EventType.BASE_SUBSTITUTION.index] = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
|
||||
tempQualArray[EventType.BASE_INSERTION.index] = pileupElement.getBaseInsertionQual();
|
||||
tempErrorArray[EventType.BASE_INSERTION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterInsertion() : pileupElement.isBeforeInsertion();
|
||||
tempQualArray[EventType.BASE_DELETION.index] = pileupElement.getBaseDeletionQual();
|
||||
tempErrorArray[EventType.BASE_DELETION.index] = (pileupElement.getRead().getReadNegativeStrandFlag()) ? pileupElement.isAfterDeletedBase() : pileupElement.isBeforeDeletedBase();
|
||||
|
||||
for (final EventType eventType : EventType.values()) {
|
||||
final int[] keys = readCovariates.getKeySet(offset, eventType);
|
||||
final int eventIndex = eventType.index;
|
||||
final byte qual = tempQualArray[eventIndex];
|
||||
final boolean isError = tempErrorArray[eventIndex];
|
||||
|
||||
// TODO: should this really be combine rather than increment?
|
||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
|
|
|
|||
|
|
@ -55,9 +55,12 @@ public class AdvancedPerReadAlleleLikelihoodMap extends StandardPerReadAlleleLik
|
|||
alleleReadMap.put(allele, new ArrayList<GATKSAMRecord>());
|
||||
|
||||
for ( Map.Entry<GATKSAMRecord, Map<Allele, Double>> entry : likelihoodReadMap.entrySet() ) {
|
||||
final Allele bestAllele = getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele != Allele.NO_CALL )
|
||||
alleleReadMap.get(bestAllele).add(entry.getKey());
|
||||
// do not remove reduced reads!
|
||||
if ( !entry.getKey().isReducedRead() ) {
|
||||
final Allele bestAllele = getMostLikelyAllele(entry.getValue());
|
||||
if ( bestAllele != Allele.NO_CALL )
|
||||
alleleReadMap.get(bestAllele).add(entry.getKey());
|
||||
}
|
||||
}
|
||||
|
||||
// compute the reads to remove and actually remove them
|
||||
|
|
|
|||
|
|
@ -51,21 +51,21 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "55a46d8f5d2f9acfa2d7659e18b6df43")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "8e930f56a8905a5999af7d6ba8a92f91")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "8e87bee4bd6531b405082c4da785f1f5")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "b309a5f57b861d7f31cb76cdac4ff8a7")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "4c75d47ed2cf93b499be8fbb29b24dfd")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "43b06e5568a89e4ce1dd9146ce580c89")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "25f4f48dba27475b0cd7c06ef0239aba")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "dfcba9acc32b4a1dfeceea135b48615a")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "e8077b721f2e6f51c1945b6f6236835c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "fbdc8d0fd312e3a7f49063c580cf5d92")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "4f47415628201a4f3c33e48ec066677b")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "1e89d2b88f4218363b9322b38e9536f2")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "a7beb0b16756257a274eecf73474ed90")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "dfcba9acc32b4a1dfeceea135b48615a")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "2082c70e08f1c14290c3812021832f83")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "387b41dc2221a1a4a782958944662b25")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "b5e26902e76abbd59f94f65c70d18165")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "a8a9c3f83269911cb61c5fe8fb98dc4a")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "f43a0473101c63ae93444c300d843e81")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "9e05e63339d4716584bfc717cab6bd0f")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "1cf9b9c9c64617dc0f3d2f203f918dbe")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "aa1949a77bc3066fee551a217c970c0d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "f70d8b5358bc2f76696f14b7a807ede0")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4c0f63e06830681560a1e9f9aad9fe98")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "be2812cd3dae3c326cf35ae3f1c8ad9e")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "03c29a0c1d21f72b12daf51cec111599")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "7080b2cad02ec6e67ebc766b2dccebf8")},
|
||||
{new BQSRTest(b36KGReference, privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam", "1:50,000-80,000", " --solid_nocall_strategy LEAVE_READ_UNRECALIBRATED", "30e76055c16843b6e33e5b9bd8ced57c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "f70d8b5358bc2f76696f14b7a807ede0")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "5e657fd6a44dcdc7674b6e5a2de5dc83")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -122,12 +122,12 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
public Object[][] createPRTestData() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{1, new PRTest(" -qq -1", "a1d87da5dcbde35170d6ba6bc3ee2812")});
|
||||
tests.add(new Object[]{1, new PRTest(" -qq 6", "a0fecae6d0e5ab9917862fa306186d10")});
|
||||
tests.add(new Object[]{1, new PRTest(" -qq -1", "5226c06237b213b9e9b25a32ed92d09a")});
|
||||
tests.add(new Object[]{1, new PRTest(" -qq 6", "b592a5c62b952a012e18adb898ea9c33")});
|
||||
tests.add(new Object[]{1, new PRTest(" -DIQ", "8977bea0c57b808e65e9505eb648cdf7")});
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, new PRTest("", "d1bbb4ce6aa93e866f106f8b11d888ed")});
|
||||
tests.add(new Object[]{nct, new PRTest("", "ab2f209ab98ad3432e208cbd524a4c4a")});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -55,36 +55,36 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testSNP_ACS_Pools() {
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","ec19f0b7c7d57493cecfff988a4815c8");
|
||||
PC_LSV_Test_short(" -maxAltAlleles 1 -ploidy 6 -out_mode EMIT_ALL_CONFIDENT_SITES","LSV_SNP_ACS","SNP","df0e67c975ef74d593f1c704daab1705");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","9ce24f4ff787aed9d3754519a60ef49f");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","BOTH","7e5b28c9e21cc7e45c58c41177d8a0fc");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","492c8ba9a80a902097ff15bbeb031592");
|
||||
PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","INDEL","ae6c276cc46785a794acff6f7d10ecf7");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","848e1092b5cd57b0da5f1187e67134e7");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","481452ad7d6378cffb5cd834cc621d55");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","51a7b51d82a341adec0e6510f5dfadd8");
|
||||
PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","INDEL","812957e51277aca9925c1a7bb4d9a118");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","0a8c3b06243040b743dd90d497bb3f83");
|
||||
PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "f8ea18ec6a717a77fdf8c5f2482d8d8d");
|
||||
PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "1d826f852ec1457efbfa7ee828c5783a");
|
||||
HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "01367428c26d3eaf9297c58bf8677dd3");
|
||||
HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "53caa950535749f99d3c5b9bb61c7b60");
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleSymbolic() {
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "b4ea70a446e4782bd3700ca14dd726ff");
|
||||
HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
|
||||
}
|
||||
|
||||
private void HCTestIndelQualityScores(String bam, String args, String md5) {
|
||||
|
|
@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c54c0c9411054bf629bfd98b616e53fc"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c642dcd93771f6f084d55de31f180d1b"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("79af83432dc4a1768b3ebffffc4d2b8f"));
|
||||
Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
|
||||
executeTest("HC calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -406,7 +406,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
else
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||
value.asString(), fieldName, e.getMessage()));
|
||||
value, fieldName, e.getMessage()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,6 +64,7 @@ import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
|||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* A GenomeAnalysisEngine that runs a specified walker.
|
||||
|
|
@ -73,6 +74,7 @@ public class GenomeAnalysisEngine {
|
|||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(GenomeAnalysisEngine.class);
|
||||
public static final long NO_RUNTIME_LIMIT = -1;
|
||||
|
||||
/**
|
||||
* The GATK command-line argument parsing code.
|
||||
|
|
@ -1090,6 +1092,33 @@ public class GenomeAnalysisEngine {
|
|||
public String createApproximateCommandLineArgumentString(Object... argumentProviders) {
|
||||
return CommandLineUtils.createApproximateCommandLineArgumentString(parsingEngine,argumentProviders);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Does the current runtime in unit exceed the runtime limit, if one has been provided?
|
||||
*
|
||||
* @param runtime the runtime of this GATK instance in minutes
|
||||
* @param unit the time unit of runtime
|
||||
* @return false if not limit was requested or if runtime <= the limit, true otherwise
|
||||
*/
|
||||
public boolean exceedsRuntimeLimit(final long runtime, final TimeUnit unit) {
|
||||
if ( runtime < 0 ) throw new IllegalArgumentException("runtime must be >= 0 but got " + runtime);
|
||||
|
||||
if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT )
|
||||
return false;
|
||||
else {
|
||||
final long actualRuntimeNano = TimeUnit.NANOSECONDS.convert(runtime, unit);
|
||||
final long maxRuntimeNano = getRuntimeLimitInNanoseconds();
|
||||
return actualRuntimeNano > maxRuntimeNano;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the runtime limit in nanoseconds, or -1 if no limit was specified
|
||||
*/
|
||||
public long getRuntimeLimitInNanoseconds() {
|
||||
if ( getArguments().maxRuntime == NO_RUNTIME_LIMIT )
|
||||
return -1;
|
||||
else
|
||||
return TimeUnit.NANOSECONDS.convert(getArguments().maxRuntime, getArguments().maxRuntimeUnits);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
|
|
@ -44,6 +45,7 @@ import java.io.File;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -91,7 +93,7 @@ public class GATKArgumentCollection {
|
|||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// XXX
|
||||
// General features
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
|
@ -143,6 +145,12 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "disableRandomization",doc="Completely eliminates randomization from nondeterministic methods. To be used mostly in the testing framework where dynamic parallelism can result in differing numbers of calls to the generator.")
|
||||
public boolean disableRandomization = false;
|
||||
|
||||
@Argument(fullName = "maxRuntime", shortName = "maxRuntime", doc="If provided, that GATK will stop execution cleanly as soon after maxRuntime has been exceeded, truncating the run but not exiting with a failure. By default the value is interpreted in minutes, but this can be changed by maxRuntimeUnits", required = false)
|
||||
public long maxRuntime = GenomeAnalysisEngine.NO_RUNTIME_LIMIT;
|
||||
|
||||
@Argument(fullName = "maxRuntimeUnits", shortName = "maxRuntimeUnits", doc="The TimeUnit for maxRuntime", required = false)
|
||||
public TimeUnit maxRuntimeUnits = TimeUnit.MINUTES;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Downsampling Arguments
|
||||
|
|
|
|||
|
|
@ -75,8 +75,9 @@ public class StandardCallerArgumentCollection {
|
|||
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
|
||||
* will try to remove (N * contamination fraction) bases for each alternate allele.
|
||||
*/
|
||||
@Argument(fullName = "contamination_percentage_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
|
||||
public double CONTAMINATION_FRACTION = 0.0;
|
||||
@Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
|
||||
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
|
||||
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false)
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
final ReduceTree reduceTree = new ReduceTree(this);
|
||||
initializeWalker(walker);
|
||||
|
||||
while (isShardTraversePending() || isTreeReducePending()) {
|
||||
while (! abortExecution() && (isShardTraversePending() || isTreeReducePending())) {
|
||||
// Check for errors during execution.
|
||||
errorTracker.throwErrorIfPending();
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
|
||||
final TraversalEngine traversalEngine = borrowTraversalEngine(this);
|
||||
for (Shard shard : shardStrategy ) {
|
||||
if ( done || shard == null ) // we ran out of shards that aren't owned
|
||||
if ( abortExecution() || done || shard == null ) // we ran out of shards that aren't owned
|
||||
break;
|
||||
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
|||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -52,6 +53,7 @@ import javax.management.ObjectName;
|
|||
import java.io.File;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -269,6 +271,26 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
this.threadEfficiencyMonitor = threadEfficiencyMonitor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we stop all execution work and exit gracefully?
|
||||
*
|
||||
* Returns true in the case where some external signal or time limit has been received, indicating
|
||||
* that this GATK shouldn't continue executing. This isn't a kill signal, it is really a "shutdown
|
||||
* gracefully at the next opportunity" signal. Concrete implementations of the MicroScheduler
|
||||
* examine this value as often as reasonable and, if it returns true, stop what they are doing
|
||||
* at the next available opportunity, shutdown their resources, call notify done, and return.
|
||||
*
|
||||
* @return true if we should abort execution, or false otherwise
|
||||
*/
|
||||
protected boolean abortExecution() {
|
||||
final boolean abort = engine.exceedsRuntimeLimit(progressMeter.getRuntimeInNanoseconds(), TimeUnit.NANOSECONDS);
|
||||
if ( abort ) {
|
||||
final AutoFormattingTime aft = new AutoFormattingTime(TimeUnit.SECONDS.convert(engine.getRuntimeLimitInNanoseconds(), TimeUnit.NANOSECONDS), 1, 4);
|
||||
logger.info("Aborting execution (cleanly) because the runtime has exceeded the requested maximum " + aft);
|
||||
}
|
||||
return abort;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks a walker over the given list of intervals.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -364,7 +364,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
long end = hapmap.getEnd();
|
||||
if ( deletionLength > 0 )
|
||||
end += deletionLength;
|
||||
end += (deletionLength - 1);
|
||||
VariantContext vc = new VariantContextBuilder(name, hapmap.getChr(), hapmap.getStart(), end, alleles).id(hapmap.getName()).genotypes(genotypes).make();
|
||||
return vc;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,35 +25,41 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.bqsr;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.GATKLiteUtils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.recalibration.QuantizationInfo;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.sting.utils.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.sting.utils.recalibration.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* First pass of the base quality score recalibration -- Generates recalibration table based on various user-specified covariates (such as read group, reported quality score, machine cycle, and nucleotide context).
|
||||
|
|
@ -103,19 +109,20 @@ import java.util.ArrayList;
|
|||
* </pre>
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@DocumentedGATKFeature(groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class})
|
||||
@BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN)
|
||||
@By(DataSource.READS)
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
|
||||
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
|
||||
public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
||||
|
||||
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class, UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public class BaseRecalibrator extends ReadWalker<Long, Long> implements NanoSchedulable {
|
||||
@ArgumentCollection
|
||||
private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName = "bqsrBAQGapOpenPenalty", shortName="bqsrBAQGOP", doc="BQSR BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
|
||||
public double BAQGOP = BAQ.DEFAULT_GOP;
|
||||
|
||||
private QuantizationInfo quantizationInfo; // an object that keeps track of the information necessary for quality score quantization
|
||||
|
||||
|
||||
private RecalibrationTables recalibrationTables;
|
||||
|
||||
private Covariate[] requestedCovariates; // list to hold the all the covariate objects that were requested (required + standard + experimental)
|
||||
|
|
@ -124,12 +131,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
|
||||
private int minimumQToUse;
|
||||
|
||||
protected static final String SKIP_RECORD_ATTRIBUTE = "SKIP"; // used to label reads that should be skipped.
|
||||
protected static final String SEEN_ATTRIBUTE = "SEEN"; // used to label reads as processed.
|
||||
protected static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
|
||||
|
||||
private static final String NO_DBSNP_EXCEPTION = "This calculation is critically dependent on being able to skip over known variant sites. Please provide a VCF file containing known sites of genetic variation.";
|
||||
|
||||
private BAQ baq; // BAQ the reads on the fly to generate the alignment uncertainty vector
|
||||
private IndexedFastaSequenceFile referenceReader; // fasta reference reader for use with BAQ calculation
|
||||
|
||||
/**
|
||||
* Parse the -cov arguments and create a list of covariates to be used here
|
||||
|
|
@ -137,6 +144,8 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
baq = new BAQ(BAQGOP); // setup the BAQ object with the provided gap open penalty
|
||||
|
||||
// check for unsupported access
|
||||
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
|
||||
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
|
||||
|
|
@ -185,13 +194,21 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
recalibrationEngine.initialize(requestedCovariates, recalibrationTables);
|
||||
|
||||
minimumQToUse = getToolkit().getArguments().PRESERVE_QSCORES_LESS_THAN;
|
||||
|
||||
try {
|
||||
// fasta reference reader for use with BAQ calculation
|
||||
referenceReader = new CachingIndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
||||
} catch( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private RecalibrationEngine initializeRecalibrationEngine() {
|
||||
|
||||
final Class recalibrationEngineClass = GATKLiteUtils.getProtectedClassIfAvailable(RecalibrationEngine.class);
|
||||
try {
|
||||
Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
|
||||
final Constructor constructor = recalibrationEngineClass.getDeclaredConstructor((Class[])null);
|
||||
constructor.setAccessible(true);
|
||||
return (RecalibrationEngine)constructor.newInstance();
|
||||
}
|
||||
|
|
@ -200,60 +217,207 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
}
|
||||
}
|
||||
|
||||
private boolean readHasBeenSkipped( final GATKSAMRecord read ) {
|
||||
return read.containsTemporaryAttribute(SKIP_RECORD_ATTRIBUTE);
|
||||
}
|
||||
|
||||
private boolean isLowQualityBase( final PileupElement p ) {
|
||||
return p.getQual() < minimumQToUse;
|
||||
}
|
||||
|
||||
private boolean readNotSeen( final GATKSAMRecord read ) {
|
||||
return !read.containsTemporaryAttribute(SEEN_ATTRIBUTE);
|
||||
private boolean isLowQualityBase( final GATKSAMRecord read, final int offset ) {
|
||||
return read.getBaseQualities()[offset] < minimumQToUse;
|
||||
}
|
||||
|
||||
/**
|
||||
* For each read at this locus get the various covariate values and increment that location in the map based on
|
||||
* whether or not the base matches the reference at this particular location
|
||||
*
|
||||
* @param tracker the reference metadata tracker
|
||||
* @param ref the reference context
|
||||
* @param context the alignment context
|
||||
* @return returns 1, but this value isn't used in the reduce step
|
||||
*/
|
||||
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
long countedSites = 0L;
|
||||
// Only analyze sites not present in the provided known sites
|
||||
if (tracker.getValues(RAC.knownSites).size() == 0) {
|
||||
for (final PileupElement p : context.getBasePileup()) {
|
||||
final GATKSAMRecord read = p.getRead();
|
||||
final int offset = p.getOffset();
|
||||
public Long map( final ReferenceContext ref, final GATKSAMRecord originalRead, final RefMetaDataTracker metaDataTracker ) {
|
||||
|
||||
// This read has been marked to be skipped or base is low quality (we don't recalibrate low quality bases)
|
||||
if (readHasBeenSkipped(read) || p.isInsertionAtBeginningOfRead() || isLowQualityBase(p) )
|
||||
continue;
|
||||
final GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(originalRead);
|
||||
if( read.isEmpty() ) { return 0L; } // the whole read was inside the adaptor so skip it
|
||||
|
||||
if (readNotSeen(read)) {
|
||||
read.setTemporaryAttribute(SEEN_ATTRIBUTE, true);
|
||||
RecalUtils.parsePlatformForRead(read, RAC);
|
||||
if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) {
|
||||
read.setTemporaryAttribute(SKIP_RECORD_ATTRIBUTE, true);
|
||||
continue;
|
||||
RecalUtils.parsePlatformForRead(read, RAC);
|
||||
if (!RecalUtils.isColorSpaceConsistent(RAC.SOLID_NOCALL_STRATEGY, read)) { // parse the solid color space and check for color no-calls
|
||||
return 0L; // skip this read completely
|
||||
}
|
||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
||||
|
||||
final boolean[] skip = calculateSkipArray(read, metaDataTracker); // skip known sites of variation as well as low quality and non-regular bases
|
||||
final int[] isSNP = calculateIsSNP(read, ref, originalRead);
|
||||
final int[] isInsertion = calculateIsIndel(read, EventType.BASE_INSERTION);
|
||||
final int[] isDeletion = calculateIsIndel(read, EventType.BASE_DELETION);
|
||||
final byte[] baqArray = calculateBAQArray(read);
|
||||
|
||||
if( baqArray != null ) { // some reads just can't be BAQ'ed
|
||||
final double[] snpErrors = calculateFractionalErrorArray(isSNP, baqArray);
|
||||
final double[] insertionErrors = calculateFractionalErrorArray(isInsertion, baqArray);
|
||||
final double[] deletionErrors = calculateFractionalErrorArray(isDeletion, baqArray);
|
||||
recalibrationEngine.updateDataForRead(read, skip, snpErrors, insertionErrors, deletionErrors);
|
||||
return 1L;
|
||||
} else {
|
||||
return 0L;
|
||||
}
|
||||
}
|
||||
|
||||
protected boolean[] calculateSkipArray( final GATKSAMRecord read, final RefMetaDataTracker metaDataTracker ) {
|
||||
final byte[] bases = read.getReadBases();
|
||||
final boolean[] skip = new boolean[bases.length];
|
||||
final boolean[] knownSites = calculateKnownSites(read, metaDataTracker.getValues(RAC.knownSites));
|
||||
for( int iii = 0; iii < bases.length; iii++ ) {
|
||||
skip[iii] = !BaseUtils.isRegularBase(bases[iii]) || isLowQualityBase(read, iii) || knownSites[iii] || badSolidOffset(read, iii);
|
||||
}
|
||||
return skip;
|
||||
}
|
||||
|
||||
protected boolean badSolidOffset( final GATKSAMRecord read, final int offset ) {
|
||||
return ReadUtils.isSOLiDRead(read) && RAC.SOLID_RECAL_MODE != RecalUtils.SOLID_RECAL_MODE.DO_NOTHING && !RecalUtils.isColorSpaceConsistent(read, offset);
|
||||
}
|
||||
|
||||
protected boolean[] calculateKnownSites( final GATKSAMRecord read, final List<Feature> features ) {
|
||||
final int BUFFER_SIZE = 0;
|
||||
final int readLength = read.getReadBases().length;
|
||||
final boolean[] knownSites = new boolean[readLength];
|
||||
Arrays.fill(knownSites, false);
|
||||
for( final Feature f : features ) {
|
||||
int featureStartOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getStart(), ReadUtils.ClippingTail.LEFT_TAIL, true); // BUGBUG: should I use LEFT_TAIL here?
|
||||
if( featureStartOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureStartOnRead = 0; }
|
||||
int featureEndOnRead = ReadUtils.getReadCoordinateForReferenceCoordinate(read.getSoftStart(), read.getCigar(), f.getEnd(), ReadUtils.ClippingTail.LEFT_TAIL, true);
|
||||
if( featureEndOnRead == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) { featureEndOnRead = readLength; }
|
||||
Arrays.fill(knownSites, Math.max(0, featureStartOnRead - BUFFER_SIZE), Math.min(readLength, featureEndOnRead + 1 + BUFFER_SIZE), true);
|
||||
}
|
||||
return knownSites;
|
||||
}
|
||||
|
||||
// BUGBUG: can be merged with calculateIsIndel
|
||||
protected static int[] calculateIsSNP( final GATKSAMRecord read, final ReferenceContext ref, final GATKSAMRecord originalRead ) {
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), read.getAlignmentStart() - originalRead.getAlignmentStart(), ref.getBases().length + read.getAlignmentEnd() - originalRead.getAlignmentEnd());
|
||||
final int[] snp = new int[readBases.length];
|
||||
int readPos = 0;
|
||||
int refPos = 0;
|
||||
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
|
||||
final int elementLength = ce.getLength();
|
||||
switch (ce.getOperator()) {
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
for( int iii = 0; iii < elementLength; iii++ ) {
|
||||
snp[readPos] = ( BaseUtils.basesAreEqual(readBases[readPos], refBases[refPos]) ? 0 : 1 );
|
||||
readPos++;
|
||||
refPos++;
|
||||
}
|
||||
read.setTemporaryAttribute(COVARS_ATTRIBUTE, RecalUtils.computeCovariates(read, requestedCovariates));
|
||||
}
|
||||
|
||||
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
if (!ReadUtils.isSOLiDRead(read) ||
|
||||
RAC.SOLID_RECAL_MODE == RecalUtils.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
RecalUtils.isColorSpaceConsistent(read, offset))
|
||||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
recalibrationEngine.updateDataForPileupElement(p, ref.getBase());
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
refPos += elementLength;
|
||||
break;
|
||||
case I:
|
||||
case S: // ReferenceContext doesn't have the soft clipped bases!
|
||||
readPos += elementLength;
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
||||
}
|
||||
countedSites++;
|
||||
}
|
||||
return snp;
|
||||
}
|
||||
|
||||
protected static int[] calculateIsIndel( final GATKSAMRecord read, final EventType mode ) {
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final int[] indel = new int[readBases.length];
|
||||
Arrays.fill(indel, 0);
|
||||
int readPos = 0;
|
||||
for ( final CigarElement ce : read.getCigar().getCigarElements() ) {
|
||||
final int elementLength = ce.getLength();
|
||||
switch (ce.getOperator()) {
|
||||
case M:
|
||||
case EQ:
|
||||
case X:
|
||||
case S:
|
||||
{
|
||||
readPos += elementLength;
|
||||
break;
|
||||
}
|
||||
case D:
|
||||
{
|
||||
final int index = ( read.getReadNegativeStrandFlag() ? readPos : ( readPos > 0 ? readPos - 1 : readPos ) );
|
||||
indel[index] = ( mode.equals(EventType.BASE_DELETION) ? 1 : 0 );
|
||||
break;
|
||||
}
|
||||
case I:
|
||||
{
|
||||
final boolean forwardStrandRead = !read.getReadNegativeStrandFlag();
|
||||
if( forwardStrandRead ) {
|
||||
indel[(readPos > 0 ? readPos - 1 : readPos)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 );
|
||||
}
|
||||
for (int iii = 0; iii < elementLength; iii++) {
|
||||
readPos++;
|
||||
}
|
||||
if( !forwardStrandRead ) {
|
||||
indel[(readPos < indel.length ? readPos : readPos - 1)] = ( mode.equals(EventType.BASE_INSERTION) ? 1 : 0 );
|
||||
}
|
||||
break;
|
||||
}
|
||||
case N:
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
||||
}
|
||||
}
|
||||
return indel;
|
||||
}
|
||||
|
||||
protected static double[] calculateFractionalErrorArray( final int[] errorArray, final byte[] baqArray ) {
|
||||
if(errorArray.length != baqArray.length ) {
|
||||
throw new ReviewedStingException("Array length mismatch detected. Malformed read?");
|
||||
}
|
||||
|
||||
return countedSites;
|
||||
final byte NO_BAQ_UNCERTAINTY = (byte)'@';
|
||||
final int BLOCK_START_UNSET = -1;
|
||||
|
||||
final double[] fractionalErrors = new double[baqArray.length];
|
||||
Arrays.fill(fractionalErrors, 0.0);
|
||||
boolean inBlock = false;
|
||||
int blockStartIndex = BLOCK_START_UNSET;
|
||||
int iii;
|
||||
for( iii = 0; iii < fractionalErrors.length; iii++ ) {
|
||||
if( baqArray[iii] == NO_BAQ_UNCERTAINTY ) {
|
||||
if( !inBlock ) {
|
||||
fractionalErrors[iii] = (double) errorArray[iii];
|
||||
} else {
|
||||
calculateAndStoreErrorsInBlock(iii, blockStartIndex, errorArray, fractionalErrors);
|
||||
inBlock = false; // reset state variables
|
||||
blockStartIndex = BLOCK_START_UNSET; // reset state variables
|
||||
}
|
||||
} else {
|
||||
inBlock = true;
|
||||
if( blockStartIndex == BLOCK_START_UNSET ) { blockStartIndex = iii; }
|
||||
}
|
||||
}
|
||||
if( inBlock ) {
|
||||
calculateAndStoreErrorsInBlock(iii-1, blockStartIndex, errorArray, fractionalErrors);
|
||||
}
|
||||
if( fractionalErrors.length != errorArray.length ) {
|
||||
throw new ReviewedStingException("Output array length mismatch detected. Malformed read?");
|
||||
}
|
||||
return fractionalErrors;
|
||||
}
|
||||
|
||||
private static void calculateAndStoreErrorsInBlock( final int iii,
|
||||
final int blockStartIndex,
|
||||
final int[] errorArray,
|
||||
final double[] fractionalErrors ) {
|
||||
int totalErrors = 0;
|
||||
for( int jjj = Math.max(0,blockStartIndex-1); jjj <= iii; jjj++ ) {
|
||||
totalErrors += errorArray[jjj];
|
||||
}
|
||||
for( int jjj = Math.max(0, blockStartIndex-1); jjj <= iii; jjj++ ) {
|
||||
fractionalErrors[jjj] = ((double) totalErrors) / ((double)(iii - Math.max(0,blockStartIndex-1) + 1));
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] calculateBAQArray( final GATKSAMRecord read ) {
|
||||
baq.baqRead(read, referenceReader, BAQ.CalculationMode.RECALCULATE, BAQ.QualityMode.ADD_TAG);
|
||||
return BAQ.getBAQTag(read);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -286,12 +450,12 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
generateReport();
|
||||
logger.info("...done!");
|
||||
|
||||
if (RAC.RECAL_PDF_FILE != null) {
|
||||
if ( RAC.RECAL_PDF_FILE != null ) {
|
||||
logger.info("Generating recalibration plots...");
|
||||
generatePlots();
|
||||
}
|
||||
|
||||
logger.info("Processed: " + result + " sites");
|
||||
logger.info("Processed: " + result + " reads");
|
||||
}
|
||||
|
||||
private void generatePlots() {
|
||||
|
|
@ -304,7 +468,6 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
RecalUtils.generateRecalibrationPlot(RAC, recalibrationTables, requestedCovariates);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* go through the quality score table and use the # observations and the empirical quality score
|
||||
* to build a quality score histogram for quantization. Then use the QuantizeQual algorithm to
|
||||
|
|
@ -317,5 +480,4 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> {
|
|||
private void generateReport() {
|
||||
RecalUtils.outputRecalibrationReport(RAC, quantizationInfo, recalibrationTables, requestedCovariates);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -33,7 +33,5 @@ public interface RecalibrationEngine {
|
|||
|
||||
public void initialize(final Covariate[] covariates, final RecalibrationTables recalibrationTables);
|
||||
|
||||
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase);
|
||||
|
||||
public void updateDataForRead(final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,41 +46,30 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
this.recalibrationTables = recalibrationTables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop through the list of requested covariates and pick out the value from the read, offset, and reference
|
||||
* Using the list of covariate values as a key, pick out the RecalDatum and increment,
|
||||
* adding one to the number of observations and potentially one to the number of mismatches for mismatches only.
|
||||
*
|
||||
* @param pileupElement The pileup element to update
|
||||
* @param refBase The reference base at this locus
|
||||
*/
|
||||
@Override
|
||||
public void updateDataForPileupElement(final PileupElement pileupElement, final byte refBase) {
|
||||
final int offset = pileupElement.getOffset();
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(pileupElement.getRead());
|
||||
|
||||
final byte qual = pileupElement.getQual();
|
||||
final boolean isError = !BaseUtils.basesAreEqual(pileupElement.getBase(), refBase);
|
||||
|
||||
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
|
||||
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
|
||||
|
||||
// TODO: should this really be combine rather than increment?
|
||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateDataForRead( final GATKSAMRecord read, final boolean[] skip, final double[] snpErrors, final double[] insertionErrors, final double[] deletionErrors ) {
|
||||
throw new UnsupportedOperationException("Delocalized BQSR is not available in the GATK-lite version");
|
||||
for( int offset = 0; offset < read.getReadBases().length; offset++ ) {
|
||||
if( !skip[offset] ) {
|
||||
final ReadCovariates readCovariates = covariateKeySetFrom(read);
|
||||
|
||||
final byte qual = read.getBaseQualities()[offset];
|
||||
final double isError = snpErrors[offset];
|
||||
|
||||
final int[] keys = readCovariates.getKeySet(offset, EventType.BASE_SUBSTITUTION);
|
||||
final int eventIndex = EventType.BASE_SUBSTITUTION.index;
|
||||
|
||||
combineDatumOrPutIfNecessary(recalibrationTables.getReadGroupTable(), qual, isError, keys[0], eventIndex);
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getQualityScoreTable(), qual, isError, keys[0], keys[1], eventIndex);
|
||||
|
||||
for (int i = 2; i < covariates.length; i++) {
|
||||
if (keys[i] < 0)
|
||||
continue;
|
||||
|
||||
incrementDatumOrPutIfNecessary(recalibrationTables.getTable(i), qual, isError, keys[0], keys[1], keys[i], eventIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -90,10 +79,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
* @param isError whether or not the observation is an error
|
||||
* @return a new RecalDatum object with the observation and the error
|
||||
*/
|
||||
protected RecalDatum createDatumObject(final byte reportedQual, final boolean isError) {
|
||||
return new RecalDatum(1, isError ? 1:0, reportedQual);
|
||||
}
|
||||
|
||||
protected RecalDatum createDatumObject(final byte reportedQual, final double isError) {
|
||||
return new RecalDatum(1, isError, reportedQual);
|
||||
}
|
||||
|
|
@ -108,46 +93,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
return (ReadCovariates) read.getTemporaryAttribute(BaseRecalibrator.COVARS_ATTRIBUTE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error status for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void incrementDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
|
||||
// There are three cases here:
|
||||
//
|
||||
// 1. The original get succeeded (existingDatum != null) because there already was an item in this position.
|
||||
// In this case we can just increment the existing item.
|
||||
//
|
||||
// 2. The original get failed (existingDatum == null), and we successfully put a new item in this position
|
||||
// and are done.
|
||||
//
|
||||
// 3. The original get failed (existingDatum == null), AND the put fails because another thread comes along
|
||||
// in the interim and puts an item in this position. In this case we need to do another get after the
|
||||
// failed put to get the item the other thread put here and increment it.
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(createDatumObject(qual, isError), keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and increment it (item is guaranteed to exist at this point)
|
||||
table.get(keys).increment(isError);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so increment it
|
||||
existingDatum.increment(isError);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Increments the RecalDatum at the specified position in the specified table, or put a new item there
|
||||
* if there isn't already one.
|
||||
|
|
@ -177,36 +122,6 @@ public class StandardRecalibrationEngine implements RecalibrationEngine, PublicP
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
|
||||
* new item there if there isn't already one.
|
||||
*
|
||||
* Does this in a thread-safe way WITHOUT being synchronized: relies on the behavior of NestedIntegerArray.put()
|
||||
* to return false if another thread inserts a new item at our position in the middle of our put operation.
|
||||
*
|
||||
* @param table the table that holds/will hold our item
|
||||
* @param qual qual for this event
|
||||
* @param isError error status for this event
|
||||
* @param keys location in table of our item
|
||||
*/
|
||||
protected void combineDatumOrPutIfNecessary( final NestedIntegerArray<RecalDatum> table, final byte qual, final boolean isError, final int... keys ) {
|
||||
final RecalDatum existingDatum = table.get(keys);
|
||||
final RecalDatum newDatum = createDatumObject(qual, isError);
|
||||
|
||||
if ( existingDatum == null ) {
|
||||
// No existing item, try to put a new one
|
||||
if ( ! table.put(newDatum, keys) ) {
|
||||
// Failed to put a new item because another thread came along and put an item here first.
|
||||
// Get the newly-put item and combine it with our item (item is guaranteed to exist at this point)
|
||||
table.get(keys).combine(newDatum);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Easy case: already an item here, so combine it with our item
|
||||
existingDatum.combine(newDatum);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines the RecalDatum at the specified position in the specified table with a new RecalDatum, or put a
|
||||
* new item there if there isn't already one.
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
/**
|
||||
* Note that calculating the SLOD increases the runtime by an appreciable amount.
|
||||
*/
|
||||
@Argument(fullName = "noSLOD", shortName = "nosl", doc = "If provided, we will not calculate the SLOD", required = false)
|
||||
public boolean NO_SLOD = false;
|
||||
@Argument(fullName = "computeSLOD", shortName = "slod", doc = "If provided, we will calculate the SLOD (SB annotation)", required = false)
|
||||
public boolean COMPUTE_SLOD = false;
|
||||
|
||||
/**
|
||||
* Depending on the value of the --max_alternate_alleles argument, we may genotype only a fraction of the alleles being sent on for genotyping.
|
||||
|
|
@ -204,7 +204,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
this.GLmodel = uac.GLmodel;
|
||||
this.AFmodel = uac.AFmodel;
|
||||
this.PCR_error = uac.PCR_error;
|
||||
this.NO_SLOD = uac.NO_SLOD;
|
||||
this.COMPUTE_SLOD = uac.COMPUTE_SLOD;
|
||||
this.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED = uac.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED;
|
||||
this.MIN_BASE_QUALTY_SCORE = uac.MIN_BASE_QUALTY_SCORE;
|
||||
this.MAX_DELETION_FRACTION = uac.MAX_DELETION_FRACTION;
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
|
|
@ -238,7 +239,12 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
}
|
||||
|
||||
if ( UAC.CONTAMINATION_FRACTION > 0.0 ) {
|
||||
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling");
|
||||
if ( UAC.CONTAMINATION_FRACTION == StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION ) {
|
||||
UAC.CONTAMINATION_FRACTION = 0.0;
|
||||
logger.warn("setting contamination down-sampling fraction to 0.0 because it is not enabled in GATK-lite");
|
||||
} else {
|
||||
throw new UserException.NotSupportedInGATKLite("you cannot enable usage of contamination down-sampling");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -290,7 +296,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
if ( !UAC.NO_SLOD )
|
||||
if ( UAC.COMPUTE_SLOD )
|
||||
VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.STRAND_BIAS_KEY);
|
||||
|
||||
if ( UAC.ANNOTATE_NUMBER_OF_ALLELES_DISCOVERED )
|
||||
|
|
|
|||
|
|
@ -190,8 +190,8 @@ public class UnifiedGenotyperEngine {
|
|||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||
if ( vc != null )
|
||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
|
||||
else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
|
||||
results.add(generateEmptyContext(tracker, refContext, null, rawContext));
|
||||
// else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
|
||||
// results.add(generateEmptyContext(tracker, refContext, null, rawContext));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -454,7 +454,7 @@ public class UnifiedGenotyperEngine {
|
|||
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
}
|
||||
|
||||
if ( !UAC.NO_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||
if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||
//final boolean DEBUG_SLOD = false;
|
||||
|
||||
// the overall lod
|
||||
|
|
|
|||
|
|
@ -438,8 +438,6 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
|
||||
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
|
||||
|
||||
normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0));
|
||||
|
||||
try {
|
||||
// we already checked that bedOutput and output_file are not set simultaneously
|
||||
if ( bedOutput != null ) bedWriter = new FileWriter(bedOutput);
|
||||
|
|
@ -1152,8 +1150,9 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
|
||||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
for ( String sample : normalSamples ) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
gb.attributes(call.makeStatsAttributes(null));
|
||||
GenotypeBuilder gb = new GenotypeBuilder(sample);
|
||||
|
||||
gb=call.addStatsAttributes(gb);
|
||||
gb.alleles(! discard_event
|
||||
? alleles // we made a call - put actual het genotype here:
|
||||
: homref_alleles); // no call: genotype is ref/ref (but alleles still contain the alt if we observed anything at all)
|
||||
|
|
@ -1200,8 +1199,11 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
if ( start == 0 )
|
||||
return;
|
||||
|
||||
Map<String,Object> attrsNormal = nCall.makeStatsAttributes(null);
|
||||
Map<String,Object> attrsTumor = tCall.makeStatsAttributes(null);
|
||||
GenotypeBuilder nGB = new GenotypeBuilder();
|
||||
GenotypeBuilder tGB = new GenotypeBuilder();
|
||||
|
||||
nCall.addStatsAttributes(nGB);
|
||||
tCall.addStatsAttributes(tGB);
|
||||
|
||||
Map<String,Object> attrs = new HashMap();
|
||||
|
||||
|
|
@ -1242,11 +1244,11 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
||||
for ( String sample : normalSamples ) {
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefN ? homRefAlleles : alleles, attrsNormal));
|
||||
genotypes.add(nGB.name(sample).alleles(homRefN ? homRefAlleles : alleles).make());
|
||||
}
|
||||
|
||||
for ( String sample : tumorSamples ) {
|
||||
genotypes.add(GenotypeBuilder.create(sample, homRefT ? homRefAlleles : alleles, attrsTumor));
|
||||
genotypes.add(tGB.name(sample).alleles(homRefT ? homRefAlleles : alleles).make());
|
||||
}
|
||||
|
||||
Set<String> filters = null;
|
||||
|
|
@ -1254,14 +1256,6 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
filters = new HashSet<String>();
|
||||
filters.add("NoCall");
|
||||
}
|
||||
if ( nCall.getCoverage() < minNormalCoverage ) {
|
||||
if ( filters == null ) filters = new HashSet<String>();
|
||||
filters.add("NCov");
|
||||
}
|
||||
if ( tCall.getCoverage() < minCoverage ) {
|
||||
if ( filters == null ) filters = new HashSet<String>();
|
||||
filters.add("TCov");
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContextBuilder("IGv2_Indel_call", refName, start, stop, alleles)
|
||||
.genotypes(genotypes).filters(filters).attributes(attrs).make();
|
||||
|
|
@ -1844,6 +1838,38 @@ public class SomaticIndelDetector extends ReadWalker<Integer,Integer> {
|
|||
VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,attr);
|
||||
return attr;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds alignment statistics directly into the genotype builder object.
|
||||
*
|
||||
* @param gb
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder addStatsAttributes(GenotypeBuilder gb) {
|
||||
if ( gb == null ) gb = new GenotypeBuilder();
|
||||
|
||||
gb = VCFIndelAttributes.recordDepth(getConsensusVariantCount(),getAllVariantCount(),getCoverage(),gb);
|
||||
|
||||
gb = VCFIndelAttributes.recordAvMM(getAvConsensusMismatches(),getAvRefMismatches(),gb);
|
||||
|
||||
gb = VCFIndelAttributes.recordAvMapQ(getAvConsensusMapq(),getAvRefMapq(),gb);
|
||||
|
||||
gb = VCFIndelAttributes.recordNQSMMRate(getNQSConsensusMMRate(),getNQSRefMMRate(),gb);
|
||||
|
||||
gb = VCFIndelAttributes.recordNQSAvQ(getNQSConsensusAvQual(),getNQSRefAvQual(),gb);
|
||||
|
||||
gb = VCFIndelAttributes.recordOffsetFromStart(from_start_median,from_start_mad,gb);
|
||||
|
||||
gb = VCFIndelAttributes.recordOffsetFromEnd(from_end_median,from_end_mad,gb);
|
||||
|
||||
PrimitivePair.Int strand_cons = getConsensusStrandCounts();
|
||||
PrimitivePair.Int strand_ref = getRefStrandCounts();
|
||||
|
||||
gb = VCFIndelAttributes.recordStrandCounts(strand_cons.first,strand_cons.second,strand_ref.first,strand_ref.second,gb);
|
||||
return gb;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
interface IndelListener {
|
||||
|
|
@ -2170,18 +2196,18 @@ class VCFIndelAttributes {
|
|||
public static Set<? extends VCFHeaderLine> getAttributeHeaderLines() {
|
||||
Set<VCFHeaderLine> lines = new HashSet<VCFHeaderLine>();
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus indel/reference at the site"));
|
||||
lines.add(new VCFFormatHeaderLine(ALLELIC_DEPTH_KEY, 2, VCFHeaderLineType.Integer, "# of reads supporting consensus reference/indel at the site"));
|
||||
lines.add(new VCFFormatHeaderLine(DEPTH_TOTAL_KEY, 1, VCFHeaderLineType.Integer, "Total coverage at the site"));
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of consensus indel-supporting reads/reference-supporting reads"));
|
||||
lines.add(new VCFFormatHeaderLine(MAPQ_KEY, 2, VCFHeaderLineType.Float, "Average mapping qualities of ref-/consensus indel-supporting reads"));
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per consensus indel-supporting read/per reference-supporting read"));
|
||||
lines.add(new VCFFormatHeaderLine(MM_KEY, 2, VCFHeaderLineType.Float, "Average # of mismatches per ref-/consensus indel-supporting read"));
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in consensus indel-supporting reads/in reference-supporting reads"));
|
||||
lines.add(new VCFFormatHeaderLine(NQS_MMRATE_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: fraction of mismatching bases in ref/consensus indel-supporting reads"));
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases from consensus indel-supporting reads/from reference-supporting reads"));
|
||||
lines.add(new VCFFormatHeaderLine(NQS_AVQ_KEY, 2, VCFHeaderLineType.Float, "Within NQS window: average quality of bases in ref-/consensus indel-supporting reads"));
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned indel-supporting reads / forward-/reverse-aligned reference supporting reads"));
|
||||
lines.add(new VCFFormatHeaderLine(STRAND_COUNT_KEY, 4, VCFHeaderLineType.Integer, "Strandness: counts of forward-/reverse-aligned reference and indel-supporting reads (FwdRef,RevRef,FwdIndel,RevIndel)"));
|
||||
|
||||
lines.add(new VCFFormatHeaderLine(RSTART_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the starts of the reads"));
|
||||
lines.add(new VCFFormatHeaderLine(REND_OFFSET_KEY, 2, VCFHeaderLineType.Integer, "Median/mad of indel offsets from the ends of the reads"));
|
||||
|
|
@ -2194,39 +2220,72 @@ class VCFIndelAttributes {
|
|||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordStrandCounts(int cnt_cons_fwd, int cnt_cons_rev, int cnt_ref_fwd, int cnt_ref_rev,
|
||||
GenotypeBuilder gb) {
|
||||
return gb.attribute(STRAND_COUNT_KEY, new Integer[] {cnt_ref_fwd, cnt_ref_rev,cnt_cons_fwd, cnt_cons_rev } );
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordDepth(int cnt_cons, int cnt_indel, int cnt_total, Map<String,Object> attrs) {
|
||||
attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_cons, cnt_indel} );
|
||||
attrs.put(ALLELIC_DEPTH_KEY, new Integer[] {cnt_total-cnt_indel, cnt_cons} );
|
||||
attrs.put(DEPTH_TOTAL_KEY, cnt_total);
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordDepth(int cnt_cons, int cnt_indel, int cnt_total, GenotypeBuilder gb) {
|
||||
return gb.AD(new int[] {cnt_total-cnt_indel,cnt_cons} ).DP(cnt_total);
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordAvMapQ(double cons, double ref, Map<String,Object> attrs) {
|
||||
attrs.put(MAPQ_KEY, new Float[] {(float)cons, (float)ref} );
|
||||
attrs.put(MAPQ_KEY, new Float[] {(float)ref, (float)cons} );
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordAvMapQ(double cons, double ref, GenotypeBuilder gb) {
|
||||
return gb.attribute(MAPQ_KEY,new float[] {(float)ref, (float)cons} );
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordAvMM(double cons, double ref, Map<String,Object> attrs) {
|
||||
attrs.put(MM_KEY, new Float[] {(float)cons, (float)ref} );
|
||||
attrs.put(MM_KEY, new Float[] {(float)ref, (float)cons} );
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordAvMM(double cons, double ref, GenotypeBuilder gb) {
|
||||
return gb.attribute(MM_KEY, new float[] {(float)ref, (float)cons} );
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordNQSMMRate(double cons, double ref, Map<String,Object> attrs) {
|
||||
attrs.put(NQS_MMRATE_KEY, new Float[] {(float)cons, (float)ref} );
|
||||
attrs.put(NQS_MMRATE_KEY, new Float[] {(float)ref, (float)cons} );
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordNQSMMRate(double cons, double ref, GenotypeBuilder gb) {
|
||||
return gb.attribute(NQS_MMRATE_KEY, new float[] {(float)ref, (float)cons} );
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordNQSAvQ(double cons, double ref, Map<String,Object> attrs) {
|
||||
attrs.put(NQS_AVQ_KEY, new Float[] {(float)cons, (float)ref} );
|
||||
attrs.put(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} );
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordNQSAvQ(double cons, double ref, GenotypeBuilder gb) {
|
||||
return gb.attribute(NQS_AVQ_KEY, new float[] {(float)ref, (float)cons} );
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordOffsetFromStart(int median, int mad, Map<String,Object> attrs) {
|
||||
attrs.put(RSTART_OFFSET_KEY, new Integer[] {median, mad} );
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordOffsetFromStart(int median, int mad, GenotypeBuilder gb) {
|
||||
return gb.attribute(RSTART_OFFSET_KEY, new int[] {median, mad} );
|
||||
}
|
||||
|
||||
public static Map<String,Object> recordOffsetFromEnd(int median, int mad, Map<String,Object> attrs) {
|
||||
attrs.put(REND_OFFSET_KEY, new Integer[] {median, mad} );
|
||||
return attrs;
|
||||
}
|
||||
|
||||
public static GenotypeBuilder recordOffsetFromEnd(int median, int mad, GenotypeBuilder gb) {
|
||||
return gb.attribute(REND_OFFSET_KEY, new int[] {median, mad} );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,8 +55,8 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
@Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
|
||||
public int qual_epsilon = 0;
|
||||
|
||||
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")
|
||||
public int debugLevel = 0;
|
||||
@Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on") // TODO -- best to make this optional
|
||||
public int debugLevel = 0; // TODO -- best to make this an enum or boolean
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
|
@ -114,7 +114,11 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
|
|||
return quals;
|
||||
}
|
||||
|
||||
// TODO -- arguments/variables should be final, not method declaration
|
||||
private final boolean isGoodRead(PileupElement p, List<String> tags){
|
||||
// TODO -- this isn't quite right. You don't need the tags here. Instead, you want to check whether the read itself (which
|
||||
// TODO -- you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only
|
||||
// TODO -- for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
|
||||
return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -459,7 +459,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
|
||||
UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES;
|
||||
UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
UAC.NO_SLOD = true;
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
|
||||
headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,16 +46,20 @@ import java.util.Set;
|
|||
|
||||
|
||||
/**
|
||||
* Strictly validates a variants file.
|
||||
* Validates a VCF file with an extra strict set of criteria.
|
||||
*
|
||||
* <p>
|
||||
* ValidateVariants is a GATK tool that takes a VCF file and validates much of the information inside it.
|
||||
* Checks include the correctness of the reference base(s), accuracy of AC & AN values, tests against rsIDs
|
||||
* when a dbSNP file is provided, and that all alternate alleles are present in at least one sample.
|
||||
* In addition to standard adherence to the VCF specification, this tool performs extra checks to make ensure
|
||||
* the information contained within the file is correct. Checks include the correctness of the reference base(s),
|
||||
* accuracy of AC & AN values, tests against rsIDs when a dbSNP file is provided, and that all alternate alleles
|
||||
* are present in at least one sample.
|
||||
*
|
||||
* If you are looking simply to test the adherence to the VCF specification, use --validationType NONE.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* A variant set to filter.
|
||||
* A variant set to validate.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
|
|
@ -79,10 +83,9 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||
|
||||
public enum ValidationType {
|
||||
ALL, REF, IDS, ALLELES, CHR_COUNTS
|
||||
ALL, REF, IDS, ALLELES, CHR_COUNTS, NONE
|
||||
}
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "validationType", shortName = "type", doc = "which validation type to run", required = false)
|
||||
protected ValidationType type = ValidationType.ALL;
|
||||
|
||||
|
|
@ -172,7 +175,7 @@ public class ValidateVariants extends RodWalker<Integer, Integer> {
|
|||
numErrors++;
|
||||
logger.warn("***** " + e.getMessage() + " *****");
|
||||
} else {
|
||||
throw new UserException.MalformedFile(file, e.getMessage());
|
||||
throw new UserException.FailsStrictValidation(file, e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
|
||||
Map<String, Allele> alleleMap = new HashMap<String, Allele>(2);
|
||||
alleleMap.put(RawHapMapFeature.DELETION, Allele.create(ref.getBase(), dbsnpVC.isSimpleInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create(ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
||||
alleleMap.put(RawHapMapFeature.INSERTION, Allele.create((char)ref.getBase() + ((RawHapMapFeature)record).getAlleles()[1], !dbsnpVC.isSimpleInsertion()));
|
||||
hapmap.setActualAlleles(alleleMap);
|
||||
|
||||
// also, use the correct positioning for insertions
|
||||
|
|
|
|||
|
|
@ -346,7 +346,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
@Override
|
||||
protected String getFreezeFields() {
|
||||
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n");
|
||||
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%nif (num_cpu_threads_per_data_thread.isDefined) nCoresRequest = Some(nCoresRequest.getOrElse(1) * num_cpu_threads_per_data_thread.getOrElse(1))%n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,21 @@ package org.broadinstitute.sting.utils;
|
|||
* Simple utility class that makes it convenient to print unit adjusted times
|
||||
*/
|
||||
public class AutoFormattingTime {
|
||||
double timeInSeconds; // in Seconds
|
||||
int precision; // for format
|
||||
private final int width; // for format
|
||||
private final int precision; // for format
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds, int precision) {
|
||||
double timeInSeconds; // in Seconds
|
||||
private final String formatString;
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds, final int width, int precision) {
|
||||
this.width = width;
|
||||
this.timeInSeconds = timeInSeconds;
|
||||
this.precision = precision;
|
||||
this.formatString = "%" + width + "." + precision + "f %s";
|
||||
}
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds, int precision) {
|
||||
this(timeInSeconds, 6, precision);
|
||||
}
|
||||
|
||||
public AutoFormattingTime(double timeInSeconds) {
|
||||
|
|
@ -20,6 +29,20 @@ public class AutoFormattingTime {
|
|||
return timeInSeconds;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the precision (a la format's %WIDTH.PERCISIONf)
|
||||
*/
|
||||
public int getWidth() {
|
||||
return width;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the precision (a la format's %WIDTH.PERCISIONf)
|
||||
*/
|
||||
public int getPrecision() {
|
||||
return precision;
|
||||
}
|
||||
|
||||
/**
|
||||
* Instead of 10000 s, returns 2.8 hours
|
||||
* @return
|
||||
|
|
@ -48,6 +71,6 @@ public class AutoFormattingTime {
|
|||
}
|
||||
}
|
||||
|
||||
return String.format("%6."+precision+"f %s", unitTime, unit);
|
||||
return String.format(formatString, unitTime, unit);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -159,6 +159,7 @@ public class VCFHeader {
|
|||
*/
|
||||
public void addMetaDataLine(VCFHeaderLine headerLine) {
|
||||
mMetaData.add(headerLine);
|
||||
loadMetaDataMaps();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -236,7 +237,6 @@ public class VCFHeader {
|
|||
+ VCFConstants.GENOTYPE_PL_KEY + " field. As the GATK now only manages PL fields internally"
|
||||
+ " automatically adding a corresponding PL field to your VCF header");
|
||||
addMetaDataLine(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
loadMetaDataMaps();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -283,6 +283,12 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class FailsStrictValidation extends UserException {
|
||||
public FailsStrictValidation(File f, String message) {
|
||||
super(String.format("File %s fails strict validation: %s", f.getAbsolutePath(), message));
|
||||
}
|
||||
}
|
||||
|
||||
public static class MalformedFile extends UserException {
|
||||
public MalformedFile(String message) {
|
||||
super(String.format("Unknown file is malformed: %s", message));
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.progressmeter;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
@ -200,6 +201,14 @@ public class ProgressMeter {
|
|||
"Location", processingUnitName, processingUnitName));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the current runtime in nanoseconds
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public long getRuntimeInNanoseconds() {
|
||||
return timer.getElapsedTimeNano();
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility routine that prints out process information (including timing) every N records or
|
||||
* every M seconds, for N and M set in global variables.
|
||||
|
|
|
|||
|
|
@ -61,17 +61,6 @@ public class BaseRecalibration {
|
|||
// qualityScoreByFullCovariateKey[i] = new NestedHashMap();
|
||||
// }
|
||||
|
||||
/**
|
||||
* Thread local cache to allow multi-threaded use of this class
|
||||
*/
|
||||
private ThreadLocal<ReadCovariates> readCovariatesCache;
|
||||
{
|
||||
readCovariatesCache = new ThreadLocal<ReadCovariates> () {
|
||||
@Override protected ReadCovariates initialValue() {
|
||||
return new ReadCovariates(MAXIMUM_RECALIBRATED_READ_LENGTH, requestedCovariates.length);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor using a GATK Report file
|
||||
|
|
@ -113,16 +102,7 @@ public class BaseRecalibration {
|
|||
}
|
||||
}
|
||||
|
||||
// Compute all covariates for the read
|
||||
// TODO -- the need to clear here suggests there's an error in the indexing / assumption code
|
||||
// TODO -- for BI and DI. Perhaps due to the indel buffer size on the ends of the reads?
|
||||
// TODO -- the output varies depending on whether we clear or not
|
||||
//final ReadCovariates readCovariates = readCovariatesCache.get().clear();
|
||||
|
||||
// the original code -- doesn't do any clearing
|
||||
final ReadCovariates readCovariates = readCovariatesCache.get();
|
||||
|
||||
RecalUtils.computeCovariates(read, requestedCovariates, readCovariates);
|
||||
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
|
|
|
|||
|
|
@ -1073,13 +1073,13 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
|
||||
if ( reportedAlleles.size() != observedAlleles.size() )
|
||||
throw new TribbleException.InternalCodecException(String.format("the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart()));
|
||||
throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart()));
|
||||
|
||||
int originalSize = reportedAlleles.size();
|
||||
// take the intersection and see if things change
|
||||
observedAlleles.retainAll(reportedAlleles);
|
||||
if ( observedAlleles.size() != originalSize )
|
||||
throw new TribbleException.InternalCodecException(String.format("the ALT allele(s) for the record at position %s:%d do not match what is observed in the per-sample genotypes", getChr(), getStart()));
|
||||
throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getChr(), getStart()));
|
||||
}
|
||||
|
||||
public void validateChromosomeCounts() {
|
||||
|
|
|
|||
|
|
@ -193,6 +193,8 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
infoMap.put("isHet", g.isHet() ? "1" : "0");
|
||||
infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
|
||||
infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
|
||||
if ( g.hasDP() )
|
||||
infoMap.put(VCFConstants.DEPTH_KEY, g.getDP());
|
||||
for ( Map.Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
|
||||
if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
|
||||
infoMap.put(e.getKey(), e.getValue());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.SimpleTimer;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class MaxRuntimeIntegrationTest extends WalkerTest {
|
||||
private static final long STARTUP_TIME = TimeUnit.NANOSECONDS.convert(20, TimeUnit.SECONDS);
|
||||
|
||||
private class MaxRuntimeTestProvider extends TestDataProvider {
|
||||
final long maxRuntime;
|
||||
final TimeUnit unit;
|
||||
|
||||
public MaxRuntimeTestProvider(final long maxRuntime, final TimeUnit unit) {
|
||||
super(MaxRuntimeTestProvider.class);
|
||||
this.maxRuntime = maxRuntime;
|
||||
this.unit = unit;
|
||||
setName(String.format("Max runtime test : %d of %s", maxRuntime, unit));
|
||||
}
|
||||
|
||||
public long expectedMaxRuntimeNano() {
|
||||
return TimeUnit.NANOSECONDS.convert(maxRuntime, unit) + STARTUP_TIME;
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "MaxRuntimeProvider")
|
||||
public Object[][] makeMaxRuntimeProvider() {
|
||||
for ( final TimeUnit requestedUnits : Arrays.asList(TimeUnit.NANOSECONDS, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, TimeUnit.MINUTES) )
|
||||
new MaxRuntimeTestProvider(requestedUnits.convert(30, TimeUnit.SECONDS), requestedUnits);
|
||||
|
||||
return MaxRuntimeTestProvider.getTests(MaxRuntimeTestProvider.class);
|
||||
}
|
||||
|
||||
//
|
||||
// Loop over errors to throw, make sure they are the errors we get back from the engine, regardless of NT type
|
||||
//
|
||||
@Test(enabled = true, dataProvider = "MaxRuntimeProvider", timeOut = 60 * 1000)
|
||||
public void testMaxRuntime(final MaxRuntimeTestProvider cfg) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + hg18Reference
|
||||
+ " -I " + validationDataLocation + "NA12878.WEx.downsampled20x.bam -o /dev/null"
|
||||
+ " -maxRuntime " + cfg.maxRuntime + " -maxRuntimeUnits " + cfg.unit, 0,
|
||||
Collections.<String>emptyList());
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
executeTest("Max runtime " + cfg, spec);
|
||||
final long actualRuntimeNano = timer.getElapsedTimeNano();
|
||||
|
||||
Assert.assertTrue(actualRuntimeNano < cfg.expectedMaxRuntimeNano(),
|
||||
"Actual runtime " + TimeUnit.SECONDS.convert(actualRuntimeNano, TimeUnit.NANOSECONDS)
|
||||
+ " exceeded max. tolerated runtime " + TimeUnit.SECONDS.convert(cfg.expectedMaxRuntimeNano(), TimeUnit.NANOSECONDS)
|
||||
+ " given requested runtime " + cfg.maxRuntime + " " + cfg.unit);
|
||||
}
|
||||
}
|
||||
|
|
@ -108,4 +108,22 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("8ed32a2272bab8043a255362335395ef"));
|
||||
executeTest("testUnfilteredBecomesFilteredAndPass", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilteringDPfromINFO() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||
+ " --filterExpression 'DP < 8' --filterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
|
||||
Arrays.asList("a01f7cce53ea556c9741aa60b6124c41"));
|
||||
executeTest("testFilteringDPfromINFO", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFilteringDPfromFORMAT() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||
+ " --genotypeFilterExpression 'DP < 8' --genotypeFilterName lowDP -V " + privateTestDir + "filteringDepthInFormat.vcf", 1,
|
||||
Arrays.asList("e10485c7c33d9211d0c1294fd7858476"));
|
||||
executeTest("testFilteringDPfromFORMAT", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@ import java.util.List;
|
|||
|
||||
public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129;
|
||||
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132;
|
||||
private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam";
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("847605f4efafef89529fe0e496315edd"));
|
||||
Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("bc15123620e1134f799005d71d1180fe"));
|
||||
Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("1ba7afccc8552f20d72d0b62237558e3"));
|
||||
Arrays.asList("24532eb381724cd74e99370da28d49ed"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -54,22 +54,22 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("afb8768f31ab57eb43f75c1115eadc99"));
|
||||
Arrays.asList("062a946160eec1d0fc135d58ca654ff4"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("543f68e42034bf44cfb24da8c9204320"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("94dc17d76d841f1d3a36160767ffa034"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("d915535c1458733f09f82670092fcab6"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
|
@ -77,16 +77,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("5ce03dd9ca2d9324c1d4a9d64389beb5"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("9106d01ca0d0a8fedd068e72d509f380"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMismatchedPLs() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("3c006b06b17bbe8e787d64eff6a63a19"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
|
||||
Arrays.asList("d847acf841ba8ba653f996ce4869f439"));
|
||||
executeTest("test mismatched PLs", spec);
|
||||
}
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "fd236bd635d514e4214d364f45ec4d10";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -120,21 +120,21 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
String md5 = "d408b4661b820ed86272415b8ea08780";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (single thread)", spec1);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1,
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (2 threads)", spec2);
|
||||
|
||||
GenomeAnalysisEngine.resetRandomGenerator();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1,
|
||||
baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test parallelization (4 threads)", spec3);
|
||||
}
|
||||
|
|
@ -149,15 +149,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("839ecd30d354a36b5dfa2b5e99859765"));
|
||||
Arrays.asList("56157d930da6ccd224bce1ca93f11e41"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("c7429e670ba477bf9a6bbee2fb41c5a9"));
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("abd8e33e649cc11b55e200d3940cc7e2"));
|
||||
Arrays.asList("480437dd6e2760f4ab3194431519f331"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("8a9b424e00cdbe6b5e73d517335b2186"));
|
||||
Arrays.asList("22c039412fd387dde6125b07c9a74a25"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
|
|
@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "97ba874eafc9884a4de027a84c036311");
|
||||
testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "f9ea04d96eeef29e71d37e60518c2579");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "41c046d38ea328421df924e37e017645");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -211,18 +211,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
|
||||
Arrays.asList("df524e98903d96ab9353bee7c16a69de"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("9addd225a985178339a0c49dc5fdc220"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing heterozygosity
|
||||
|
|
@ -230,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "986923de51c71635d47e3d06fe3794a1" );
|
||||
testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "fb12b1553f813004a394a391a8540873" );
|
||||
testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -259,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("04a87b87ee4323eba853c78f25551d1a"));
|
||||
Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -278,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("98058fc913b61c22d44875da1f5ea89c"));
|
||||
Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -297,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("650c53774afacfc07a595675e8cdde17"));
|
||||
Arrays.asList("3593495aab5f6204c65de0b073a6ff65"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -312,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("6a0c2a3a7bcc56ad01428c71408055aa"));
|
||||
Arrays.asList("8b486a098029d5a106b0a37eff541c15"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -325,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("5f2721c3323de5390d2d47446139f32b"));
|
||||
Arrays.asList("18efedc50cae2aacaba372265e38310b"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -335,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("7e3f67bf371112be5dbadb4fe6faa52a"));
|
||||
Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -345,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("bc31c4977cb7e563ddf9c8dea27f3f4f"));
|
||||
Arrays.asList("578c0540f4f2052a634a829bcb9cc27d"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -353,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
|
||||
Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
|
||||
Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -415,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("ba4fafec383fb988f20c8cf53dd3e9a0"));
|
||||
Arrays.asList("857b8e5df444463ac27f665c4f67fbe2"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -423,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("4c57a88de275105156aaafc6f9041365"));
|
||||
Arrays.asList("81d4c7d9010fd6733b2997bc378e7471"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -444,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
|
||||
Arrays.asList("e8ebfaac0804b782f22ab8ea35152735"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
|
||||
Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f"));
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
|
||||
|
|
@ -458,25 +450,25 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testReducedBam() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("da9c05f87bd6415e97f90c49cf68ed19"));
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
|
||||
Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba"));
|
||||
executeTest("test calling on a ReducedRead BAM", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamSNPs() {
|
||||
testReducedCalling("SNP", "1d4a826b144723ff0766c36aa0239287");
|
||||
testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReducedBamINDELs() {
|
||||
testReducedCalling("INDEL", "68ef51d5c98480e0c0192e0eecb95bca");
|
||||
testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5");
|
||||
}
|
||||
|
||||
|
||||
private void testReducedCalling(final String model, final String md5) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-11,000,000 -glm " + model, 1,
|
||||
Arrays.asList(md5));
|
||||
executeTest("test calling on a ReducedRead BAM with " + model, spec);
|
||||
}
|
||||
|
|
@ -490,7 +482,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testContaminationDownsampling() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_percentage_to_filter 0.20", 1,
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
|
||||
Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae"));
|
||||
executeTest("test contamination_percentage_to_filter 0.20", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ import java.util.Arrays;
|
|||
|
||||
public class ValidateVariantsIntegrationTest extends WalkerTest {
|
||||
|
||||
protected static final String emptyMd5 = "d41d8cd98f00b204e9800998ecf8427e";
|
||||
|
||||
public static String baseTestString(String file, String type) {
|
||||
return "-T ValidateVariants -R " + b36KGReference + " -L 1:10001292-10001303 --variant:vcf " + privateTestDir + file + " --validationType " + type;
|
||||
}
|
||||
|
|
@ -42,7 +44,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleGood.vcf", "ALL"),
|
||||
0,
|
||||
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
|
||||
Arrays.asList(emptyMd5)
|
||||
);
|
||||
|
||||
executeTest("test good file", spec);
|
||||
|
|
@ -53,7 +55,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "REF"),
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad ref base #1", spec);
|
||||
|
|
@ -64,7 +66,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad2.vcf", "REF"),
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad ref base #2", spec);
|
||||
|
|
@ -75,7 +77,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "CHR_COUNTS"),
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad chr counts #1", spec);
|
||||
|
|
@ -86,7 +88,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad2.vcf", "CHR_COUNTS"),
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad chr counts #2", spec);
|
||||
|
|
@ -97,7 +99,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "IDS") + " --dbsnp " + b36dbSNP129,
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad RS ID", spec);
|
||||
|
|
@ -108,7 +110,7 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "ALLELES"),
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad alt allele", spec);
|
||||
|
|
@ -119,18 +121,29 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad3.vcf", "REF"),
|
||||
0,
|
||||
UserException.MalformedFile.class
|
||||
UserException.FailsStrictValidation.class
|
||||
);
|
||||
|
||||
executeTest("test bad ref allele in deletion", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoValidation() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("validationExampleBad.vcf", "NONE"),
|
||||
0,
|
||||
Arrays.asList(emptyMd5)
|
||||
);
|
||||
|
||||
executeTest("test no validation", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComplexEvents() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("complexEvents.vcf", "ALL"),
|
||||
0,
|
||||
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")
|
||||
Arrays.asList(emptyMd5)
|
||||
);
|
||||
|
||||
executeTest("test validating complex events", spec);
|
||||
|
|
|
|||
|
|
@ -32,11 +32,12 @@ public class NanoSchedulerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference,
|
||||
"-nosl --no_cmdline_in_header -G",
|
||||
"--no_cmdline_in_header -G",
|
||||
//"--dbsnp " + b37dbSNP132,
|
||||
"-I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam",
|
||||
"-L 20:10,000,000-10,100,000",
|
||||
"-glm " + glm,
|
||||
"--contamination_fraction_to_filter 0.0",
|
||||
"-nt " + nt,
|
||||
"-nct " + nct,
|
||||
"-o %s"
|
||||
|
|
|
|||
|
|
@ -141,8 +141,8 @@ class GATKResourcesBundle extends QScript {
|
|||
// CEU trio (NA12878,NA12891,NA12892) best practices results (including PBT)
|
||||
//
|
||||
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/current/CEUTrio.HiSeq.WGS.b37.UG.snps_and_indels.recalibrated.filtered.phaseByTransmission.vcf",
|
||||
"CEUTrio.HiSeq.WGS.b37.UG.bestPractices.phaseByTransmission",b37,true,false))
|
||||
addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/callsets/CEUtrio_BestPractices/CEUTrio.HiSeq.WGS.b37.snps_and_indels.recalibrated.filtered.phased.CURRENT.vcf",
|
||||
"CEUTrio.HiSeq.WGS.b37.bestPractices.phased",b37,true,false))
|
||||
|
||||
//
|
||||
// example call set for wiki tutorial
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class DataProcessingPipelineTest {
|
|||
" -bwa /home/unix/carneiro/bin/bwa",
|
||||
" -bwape ",
|
||||
" -p " + projectName).mkString
|
||||
spec.fileMD5s += testOut -> "6e70efbe6bafc3fedd60bd406bd201db"
|
||||
spec.fileMD5s += testOut -> "9fca827ecc8436465b831bb6f879357a"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class PacbioProcessingPipelineTest {
|
|||
" -blasr ",
|
||||
" -test ",
|
||||
" -D " + BaseTest.publicTestDir + "exampleDBSNP.vcf").mkString
|
||||
spec.fileMD5s += testOut -> "61b06e8b78a93e6644657e6d38851084"
|
||||
spec.fileMD5s += testOut -> "b84f9c45e045685067ded681d5e6224c"
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue