Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
5db3bd6fd2
|
|
@ -75,10 +75,11 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-"+params.args, spec).getFirst();
|
||||
|
||||
WalkerTestSpec specNT2 = new WalkerTestSpec(
|
||||
params.getCommandLine() + " -nt 2",
|
||||
Arrays.asList(params.md5));
|
||||
executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
||||
// TODO -- re-enable once parallelization is fixed in BaseRecalibrator
|
||||
//WalkerTestSpec specNT2 = new WalkerTestSpec(
|
||||
// params.getCommandLine() + " -nt 2",
|
||||
// Arrays.asList(params.md5));
|
||||
//executeTest("testBQSR-nt2-"+params.args, specNT2).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -22,19 +22,10 @@ import java.util.Map;
|
|||
/**
|
||||
* Total (unfiltered) depth over all samples.
|
||||
*
|
||||
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||
* normally be excluded from the statistical calculations going into GQ and QUAL.
|
||||
*
|
||||
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
|
||||
* -dcov D is N * D
|
||||
* While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
|
||||
* internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
|
||||
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
|
||||
* N samples with -dcov D is N * D
|
||||
*/
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ import java.util.List;
|
|||
/**
|
||||
* The depth of coverage of each VCF allele in this sample.
|
||||
*
|
||||
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||
* The AD and DP are complementary fields that are two important ways of thinking about the depth of the data for this
|
||||
* sample at this site. While the sample-level (FORMAT) DP field describes the total depth of reads that passed the
|
||||
* Unified Genotyper's internal quality control metrics (like MAPQ > 17, for example), the AD values (one for each of
|
||||
* REF and ALT fields) is the unfiltered count of all reads that carried with them the
|
||||
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||
|
|
@ -35,10 +35,10 @@ import java.util.List;
|
|||
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
|
||||
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
|
||||
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
|
||||
* are unambiguously informative about the alternate allele). Because of this fact and
|
||||
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
|
||||
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
|
||||
* determine the genotype calls (see below).
|
||||
* determine the genotype calls.
|
||||
*/
|
||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||
|
||||
|
|
|
|||
|
|
@ -136,6 +136,10 @@ public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeRed
|
|||
*/
|
||||
public void initialize() {
|
||||
|
||||
// TODO -- remove me after the 2.1 release
|
||||
if ( getToolkit().getArguments().numberOfThreads > 1 )
|
||||
throw new UserException("We have temporarily disabled the ability to run BaseRecalibrator multi-threaded for performance reasons. We hope to have this fixed for the next GATK release (2.2) and apologize for the inconvenience.");
|
||||
|
||||
// check for unsupported access
|
||||
if (getToolkit().isGATKLite() && !getToolkit().getArguments().disableIndelQuals)
|
||||
throw new UserException.NotSupportedInGATKLite("base insertion/deletion recalibration is not supported, please use the --disable_indel_quals argument");
|
||||
|
|
|
|||
Loading…
Reference in New Issue