Based on NA12878 knowledge base experiments updating HC to allow for a much smaller minimum kmer length in the assembly graph.
This commit is contained in:
parent
6171419e6c
commit
b5d590ba92
|
|
@ -343,12 +343,7 @@ public class GenotypingEngine {
|
||||||
}
|
}
|
||||||
// count up the co-occurrences of the events for the R^2 calculation
|
// count up the co-occurrences of the events for the R^2 calculation
|
||||||
for( final String sample : samples ) {
|
for( final String sample : samples ) {
|
||||||
final HashSet<String> sampleSet = new HashSet<String>(1);
|
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0];
|
||||||
sampleSet.add(sample);
|
|
||||||
|
|
||||||
final List<Allele> alleleList = new ArrayList<Allele>();
|
|
||||||
alleleList.add(Allele.create(h.getBases()));
|
|
||||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet, haplotypeReadMap, alleleList )[0][0];
|
|
||||||
if( thisHapVC == null ) {
|
if( thisHapVC == null ) {
|
||||||
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
|
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
|
||||||
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
|
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
|
||||||
|
|
|
||||||
|
|
@ -130,12 +130,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
protected String keepRG = null;
|
protected String keepRG = null;
|
||||||
|
|
||||||
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
|
||||||
protected int MIN_PRUNE_FACTOR = 1;
|
protected int MIN_PRUNE_FACTOR = 2;
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
|
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
|
||||||
protected int gcpHMM = 10;
|
protected int gcpHMM = 10;
|
||||||
|
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
|
||||||
|
protected int minKmer = 11;
|
||||||
|
|
||||||
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
|
||||||
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
|
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
|
||||||
|
|
||||||
|
|
@ -282,7 +286,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
||||||
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
|
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter, minKmer );
|
||||||
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
|
||||||
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
|
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
|
|
||||||
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
|
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
|
||||||
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
|
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
|
||||||
private static final byte MIN_QUALITY = (byte) 17;
|
private static final byte MIN_QUALITY = (byte) 16;
|
||||||
|
|
||||||
// Smith-Waterman parameters originally copied from IndelRealigner
|
// Smith-Waterman parameters originally copied from IndelRealigner
|
||||||
private static final double SW_MATCH = 5.0; // 1.0;
|
private static final double SW_MATCH = 5.0; // 1.0;
|
||||||
|
|
@ -39,13 +39,15 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
private final boolean DEBUG;
|
private final boolean DEBUG;
|
||||||
private final PrintStream GRAPH_WRITER;
|
private final PrintStream GRAPH_WRITER;
|
||||||
private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
|
private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
|
||||||
|
private final int MIN_KMER;
|
||||||
|
|
||||||
private int PRUNE_FACTOR = 1;
|
private int PRUNE_FACTOR = 2;
|
||||||
|
|
||||||
public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) {
|
public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter, final int minKmer ) {
|
||||||
super();
|
super();
|
||||||
DEBUG = debug;
|
DEBUG = debug;
|
||||||
GRAPH_WRITER = graphWriter;
|
GRAPH_WRITER = graphWriter;
|
||||||
|
MIN_KMER = minKmer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
|
||||||
|
|
@ -72,8 +74,9 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
|
||||||
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
|
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
|
||||||
graphs.clear();
|
graphs.clear();
|
||||||
|
|
||||||
|
final int maxKmer = refHaplotype.getBases().length;
|
||||||
// create the graph
|
// create the graph
|
||||||
for( int kmer = 31; kmer <= 75; kmer += 6 ) {
|
for( int kmer = MIN_KMER; kmer <= maxKmer; kmer += 6 ) {
|
||||||
final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
|
||||||
if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
|
if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
|
||||||
graphs.add(graph);
|
graphs.add(graph);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue