Bugfix for DeBruijnAssembler: don't fail when read length > haplotype length

-- The previous version would generate graphs that had no reference bases at all in the situation where the reference haplotype was < the longer read length, which would cause the kmer size to exceed the reference haplotype length.  Now return immediately with a null graph when this occurs as opposed to continuing and eventually causing an error
This commit is contained in:
Mark DePristo 2013-03-22 22:58:25 -04:00
parent 464e65ea96
commit 1917d55dc2
1 changed files with 3 additions and 4 deletions

View File

@ -216,11 +216,9 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
@Requires({"reads != null", "KMER_LENGTH > 0", "refHaplotype != null"})
protected DeBruijnGraph createGraphFromSequences( final List<GATKSAMRecord> reads, final int KMER_LENGTH, final Haplotype refHaplotype, final boolean DEBUG ) {
final DeBruijnGraph graph = new DeBruijnGraph(KMER_LENGTH);
// First pull kmers from the reference haplotype and add them to the graph
//logger.info("Adding reference sequence to graph " + refHaplotype.getBaseString());
final byte[] refSequence = refHaplotype.getBases();
if( refSequence.length >= KMER_LENGTH + KMER_OVERLAP ) {
final int kmersInSequence = refSequence.length - KMER_LENGTH + 1;
@ -232,12 +230,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
return null;
}
}
} else {
// not enough reference sequence to build a kmer graph of this length, return null
return null;
}
// Next pull kmers out of every read and throw them on the graph
for( final GATKSAMRecord read : reads ) {
//if ( ! read.getReadName().equals("H06JUADXX130110:1:1213:15422:11590")) continue;
//logger.info("Adding read " + read + " with sequence " + read.getReadString());
final byte[] sequence = read.getReadBases();
final byte[] qualities = read.getBaseQualities();
final byte[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not reduced