Based on NA12878 knowledge base experiments updating HC to allow for a much smaller minimum kmer length in the assembly graph.

This commit is contained in:
Ryan Poplin 2012-12-18 15:43:56 -05:00
parent 6171419e6c
commit b5d590ba92
4 changed files with 15 additions and 13 deletions

View File

@ -343,12 +343,7 @@ public class GenotypingEngine {
}
// count up the co-occurrences of the events for the R^2 calculation
for( final String sample : samples ) {
final HashSet<String> sampleSet = new HashSet<String>(1);
sampleSet.add(sample);
final List<Allele> alleleList = new ArrayList<Allele>();
alleleList.add(Allele.create(h.getBases()));
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet, haplotypeReadMap, alleleList )[0][0];
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( Collections.singleton(sample), haplotypeReadMap, Collections.singletonList(Allele.create(h.getBases())) )[0][0];
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }

View File

@ -130,12 +130,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
protected String keepRG = null;
@Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
protected int MIN_PRUNE_FACTOR = 1;
protected int MIN_PRUNE_FACTOR = 2;
@Advanced
@Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
protected int gcpHMM = 10;
@Advanced
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
protected int minKmer = 11;
@Argument(fullName="downsampleRegion", shortName="dr", doc="coverage, per-sample, to downsample each active region to", required = false)
protected int DOWNSAMPLE_PER_SAMPLE_PER_REGION = 1000;
@ -282,7 +286,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile, e);
}
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter, minKmer );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );
}

View File

@ -28,7 +28,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
private static final byte MIN_QUALITY = (byte) 17;
private static final byte MIN_QUALITY = (byte) 16;
// Smith-Waterman parameters originally copied from IndelRealigner
private static final double SW_MATCH = 5.0; // 1.0;
@ -39,13 +39,15 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
private final boolean DEBUG;
private final PrintStream GRAPH_WRITER;
private final ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>> graphs = new ArrayList<DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>>();
private final int MIN_KMER;
private int PRUNE_FACTOR = 1;
private int PRUNE_FACTOR = 2;
public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter ) {
public SimpleDeBruijnAssembler( final boolean debug, final PrintStream graphWriter, final int minKmer ) {
super();
DEBUG = debug;
GRAPH_WRITER = graphWriter;
MIN_KMER = minKmer;
}
public ArrayList<Haplotype> runLocalAssembly( final ActiveRegion activeRegion, final Haplotype refHaplotype, final byte[] fullReferenceWithPadding, final GenomeLoc refLoc, final int PRUNE_FACTOR, final ArrayList<VariantContext> activeAllelesToGenotype ) {
@ -72,8 +74,9 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
graphs.clear();
final int maxKmer = refHaplotype.getBases().length;
// create the graph
for( int kmer = 31; kmer <= 75; kmer += 6 ) {
for( int kmer = MIN_KMER; kmer <= maxKmer; kmer += 6 ) {
final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph = new DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge>(DeBruijnEdge.class);
if( createGraphFromSequences( graph, reads, kmer, refHaplotype, DEBUG ) ) {
graphs.add(graph);

View File

@ -70,7 +70,7 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
super.setPlatform(s);
mPlatform = s;
retrievedPlatform = true;
retrievedNGSPlatform = false; // recalculate the NGSPlatform
retrievedNGSPlatform = false; // recalculate the NGSPlatform
}
public NGSPlatform getNGSPlatform() {