diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 3c0642f83..d876a403b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -143,8 +143,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // something went wrong, so abort right now with a null graph return null; - // now go through the graph already seeded with the reference sequence and add the read kmers to it as well as the artificial GGA haplotypes - if ( ! addReadKmersToGraph(builder, reads, activeAlleleHaplotypes) ) + // add the artificial GGA haplotypes to the graph + if ( ! addGGAKmersToGraph(builder, activeAlleleHaplotypes) ) + // something went wrong, so abort right now with a null graph + return null; + + // now go through the graph already seeded with the reference sequence and add the read kmers to it + if ( ! addReadKmersToGraph(builder, reads) ) // some problem was detected adding the reads to the graph, return null to indicate we failed return null; @@ -153,17 +158,16 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } /** - * Add the high-quality kmers from the reads to the graph + * Add the high-quality kmers from the artificial GGA haplotypes to the graph * * @param builder a debruijn graph builder to add the read kmers to - * @param reads a non-null list of reads whose kmers we want to add to the graph * @param activeAlleleHaplotypes a list of haplotypes to add to the graph for GGA mode * @return true if we successfully added the read kmers to the graph without corrupting it in some way */ - protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads, final List activeAlleleHaplotypes) { + protected boolean addGGAKmersToGraph(final DeBruijnGraphBuilder builder, final List activeAlleleHaplotypes) { + final int kmerLength = builder.getKmerSize(); - // First pull kmers out of the artificial GGA haplotypes and throw them on the graph for( final Haplotype haplotype : activeAlleleHaplotypes ) { final int end = haplotype.length() - kmerLength; for( int start = 0; start < end; start++ ) { @@ -171,6 +175,20 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } } + // always returns true now, but it's possible that we'd add kmers and decide we don't like the graph in some way + return true; + } + + /** + * Add the high-quality kmers from the reads to the graph + * + * @param builder a debruijn graph builder to add the read kmers to + * @param reads a non-null list of reads whose kmers we want to add to the graph + * @return true if we successfully added the read kmers to the graph without corrupting it in some way + */ + protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads) { + final int kmerLength = builder.getKmerSize(); + // Next pull kmers out of every read and throw them on the graph for( final GATKSAMRecord read : reads ) { final byte[] sequence = read.getReadBases(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java index 20edcb39b..06c127a84 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java @@ -352,7 +352,7 @@ public final class SeqGraph extends BaseGraph { * Merge until the graph has no vertices that are candidates for merging */ public boolean transformUntilComplete() { - boolean didAtLeastOneTranform = false; + boolean didAtLeastOneTransform = false; boolean foundNodesToMerge = true; while( foundNodesToMerge ) { foundNodesToMerge = false; @@ -360,13 +360,13 @@ public final class SeqGraph extends BaseGraph { for( final SeqVertex v : vertexSet() ) { foundNodesToMerge = tryToTransform(v); if ( foundNodesToMerge ) { - didAtLeastOneTranform = true; + didAtLeastOneTransform = true; break; } } } - return didAtLeastOneTranform; + return didAtLeastOneTransform; } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index 3d4d38d8e..bd24891bc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -97,6 +97,8 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // add the reference sequence to the graph rtgraph.addSequence("ref", refHaplotype.getBases(), null, true); + + // add the artificial GGA haplotypes to the graph int hapCount = 0; for( final Haplotype h : activeAlleleHaplotypes ) { final int[] counts = new int[h.length()]; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index 2ca78f306..95592241d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -147,7 +147,50 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } - assembler.addReadKmersToGraph(builder, Arrays.asList(read), Collections.emptyList()); + assembler.addReadKmersToGraph(builder, Arrays.asList(read)); + Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); + for ( final Kmer addedKmer : builder.addedPairs ) { + Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); + } + } + + @DataProvider(name = "AddGGAKmersToGraph") + public Object[][] makeAddGGAKmersToGraphData() { + List tests = new ArrayList(); + + // this functionality can be adapted to provide input data for whatever you might want in your data + final String bases = "ACGTAACCGGTTAAACCCGGGTTT"; + final int readLen = bases.length(); + final List allBadStarts = new ArrayList(readLen); + for ( int i = 0; i < readLen; i++ ) allBadStarts.add(i); + + for ( final int kmerSize : Arrays.asList(3, 4, 5) ) { + tests.add(new Object[]{bases, kmerSize}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "AddGGAKmersToGraph", enabled = ! DEBUG) + public void testAddGGAKmersToGraph(final String bases, final int kmerSize) { + final int readLen = bases.length(); + final DeBruijnAssembler assembler = new DeBruijnAssembler(); + final MockBuilder builder = new MockBuilder(kmerSize); + + final Set expectedBases = new HashSet(); + final Set expectedStarts = new LinkedHashSet(); + for ( int i = 0; i < readLen; i++) { + boolean good = true; + for ( int j = 0; j < kmerSize + 1; j++ ) { // +1 is for pairing + good &= i + j < readLen; + } + if ( good ) { + expectedStarts.add(i); + expectedBases.add(bases.substring(i, i + kmerSize + 1)); + } + } + + assembler.addGGAKmersToGraph(builder, Arrays.asList(new Haplotype(bases.getBytes()))); Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); for ( final Kmer addedKmer : builder.addedPairs ) { Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index b158d1509..dfd3537da 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -244,9 +244,6 @@ public class MathUtils { public static double sumLog10(final double[] log10values) { return Math.pow(10.0, log10sumLog10(log10values)); - // double s = 0.0; - // for ( double v : log10values) s += Math.pow(10.0, v); - // return s; } public static double log10sumLog10(final double[] log10values) { @@ -859,11 +856,8 @@ public class MathUtils { break; sum += x; i++; - //System.out.printf(" %d/%d", sum, i); } - //System.out.printf("Sum = %d, n = %d, maxI = %d, avg = %f%n", sum, i, maxI, (1.0 * sum) / i); - return (1.0 * sum) / i; } @@ -1359,7 +1353,7 @@ public class MathUtils { } /** - * Compute in a numerical correct way the quanity log10(1-x) + * Compute in a numerical correct way the quantity log10(1-x) * * Uses the approximation log10(1-x) = log10(1/x - 1) + log10(x) to avoid very quick underflow * in 1-x when x is very small