Merge pull request #187 from broadinstitute/rp_new_bundle_for_release

Adding the 1000G_phase1.snps.high_confidence callset to the GATK resourc...
This commit is contained in:
Mark DePristo 2013-04-24 08:44:38 -07:00
commit 91d5674cc5
4 changed files with 9 additions and 6 deletions

View File

@ -81,7 +81,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
// TODO -- this number is very low, and limits our ability to explore low-frequnecy variants. It should
// TODO -- this number is very low, and limits our ability to explore low-frequency variants. It should
// TODO -- be increased to a large number of eliminated altogether when moving to the bubble caller where
// TODO -- we are no longer considering a combinatorial number of haplotypes as the number of bubbles increases
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 25;
@ -187,10 +187,10 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
// TODO -- we need to come up with a consistent pruning algorithm. The current pruning algorithm
// TODO -- works well but it doesn't differentiate between an isolated chain that doesn't connect
// TODO -- to anything from one that's actuall has good support along the chain but just happens
// TODO -- to anything from one that's actually has good support along the chain but just happens
// TODO -- to have a connection in the middle that has weight of < pruneFactor. Ultimately
// TODO -- the pruning algorithm really should be an error correction algorithm that knows more
// TODO -- about the structure of the data and can differeniate between an infrequent path but
// TODO -- about the structure of the data and can differentiate between an infrequent path but
// TODO -- without evidence against it (such as occurs when a region is hard to get any reads through)
// TODO -- from a error with lots of weight going along another similar path
// the very first thing we need to do is zip up the graph, or pruneGraph will be too aggressive
@ -216,7 +216,7 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
seqGraph.removePathsNotConnectedToRef();
seqGraph.simplifyGraph();
if ( seqGraph.vertexSet().size() == 1 ) {
// we've prefectly assembled into a single reference haplotype, add a empty seq vertex to stop
// we've perfectly assembled into a single reference haplotype, add a empty seq vertex to stop
// the code from blowing up.
// TODO -- ref properties should really be on the vertices, not the graph itself
final SeqVertex complete = seqGraph.vertexSet().iterator().next();

View File

@ -308,7 +308,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
protected boolean useLowQualityBasesForAssembly = false;
@Hidden
@Argument(fullName="dontTrimActiveRegions", shortName="donTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false)
@Argument(fullName="dontTrimActiveRegions", shortName="dontTrimActiveRegions", doc="If specified, we will not trim down the active region from the full region (active + extension) to just the active interval for genotyping", required = false)
protected boolean dontTrimActiveRegions = false;
@Hidden

View File

@ -173,7 +173,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Additional Command Line Arguments
/////////////////////////////
/**
* The expected transition / tranversion ratio of true novel variants in your targeted region (whole genome, exome, specific
* The expected transition / transversion ratio of true novel variants in your targeted region (whole genome, exome, specific
* genes), which varies greatly by the CpG and GC content of the region. See expected Ti/Tv ratios section of the GATK best
* practices documentation (http://www.broadinstitute.org/gatk/guide/topic?name=best-practices) for more information. Normal whole genome values are 2.15 and for whole exome 3.2. Note
* that this parameter is used for display purposes only and isn't used anywhere in the algorithm!

View File

@ -160,6 +160,9 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/1kg/DCC/ftp/technical/working/20120312_phase1_v2_indel_cleaned_sites_list/ALL.wgs.phase1_release_v2.20101123.official_indel_calls.20120312.sites.vcf",
"1000G_phase1.indels", b37, true, false))
addResource(new Resource("/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.highQuality.vcf",
"1000G_phase1.snps.high_confidence, b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
"Mills_and_1000G_gold_standard.indels", b37, true, false))