The maximum kmer length is derived from the reads.
-- This is done to take advantage of longer reads which can produce less ambiguous haplotypes -- Integration tests change for HC and BiasedDownsampling
This commit is contained in:
parent
bd9875aff5
commit
89e2943dd1
|
|
@ -75,9 +75,8 @@ import java.util.*;
|
|||
public class DeBruijnAssembler extends LocalAssemblyEngine {
|
||||
|
||||
private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers
|
||||
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 12;
|
||||
private static final int NUM_BEST_PATHS_PER_KMER_GRAPH = 11;
|
||||
private static final byte MIN_QUALITY = (byte) 16;
|
||||
private static final int MAX_POSSIBLE_KMER = 75;
|
||||
private static final int GRAPH_KMER_STEP = 6;
|
||||
|
||||
// Smith-Waterman parameters originally copied from IndelRealigner, only used during GGA mode
|
||||
|
|
@ -136,7 +135,9 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
|
|||
protected void createDeBruijnGraphs( final List<GATKSAMRecord> reads, final Haplotype refHaplotype ) {
|
||||
graphs.clear();
|
||||
|
||||
final int maxKmer = Math.min(MAX_POSSIBLE_KMER, refHaplotype.getBases().length - KMER_OVERLAP);
|
||||
final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1;
|
||||
if( maxKmer < MIN_KMER ) { throw new IllegalStateException("Reads are too small for use in assembly."); }
|
||||
|
||||
// create the graph for each possible kmer
|
||||
for( int kmer = maxKmer; kmer >= MIN_KMER; kmer -= GRAPH_KMER_STEP ) {
|
||||
final DeBruijnAssemblyGraph graph = createGraphFromSequences( reads, kmer, refHaplotype, DEBUG );
|
||||
|
|
|
|||
|
|
@ -283,17 +283,17 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHCFlatContaminationCase1() {
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "0b9d6aabd5ab448f0a2d32f24ff64840");
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "9dbd17769e091ce759efda050cd4f8b2");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHCFlatContaminationCase2() {
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "a4ef4a6ce557a6b9666e234fad5c7c80");
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "b8cee98c9c693fd336fc5e574dd744ed");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHCFlatContaminationCase3() {
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "bacc98eb2baa5bb1777da24cf0f84913");
|
||||
testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "e7309bd594b8e4b54b712f9877518b8e");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "ecf563b63ca3f640d9cfcc548e8ad776");
|
||||
HCTest(CEUTRIO_BAM, "", "a9748a39604c4ec8bbdb2cb809a971f1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "874389182141f41879abea7cb350c9d4");
|
||||
HCTest(NA12878_BAM, "", "c55ebed976767e1f93d2e8ada9d52bf8");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"4aa3d0d0a859c0fc0533f29529cc3d95");
|
||||
"70a53e566e6a7090e2f29ed608e9d84f");
|
||||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
|
|
@ -102,7 +102,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"cfd717dd79ace99a266e8bb58d6cc7a6");
|
||||
"10fdbfeb3b4ea1af7f242a8aca83cb9b");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "58b484324f0ea00aaac25fb7711ad657");
|
||||
HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "a960722c1ae2b6f774d3443a7e5ac27d");
|
||||
}
|
||||
|
||||
private void HCTestSymbolicVariants(String bam, String args, String md5) {
|
||||
|
|
@ -136,7 +136,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSampleIndelQualityScores() {
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "0e8a3a31b8fe5f097d6975aee8b67cdc");
|
||||
HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "f1f867dbbe3747f16a0d9e5f11e6ed64");
|
||||
}
|
||||
|
||||
// This problem bam came from a user on the forum and it spotted a problem where the ReadClipper
|
||||
|
|
@ -146,14 +146,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestProblematicReadsModifiedInActiveRegions() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2acd853da3a0380650de6827b7c790ac"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("0689d2c202849fd05617648eaf429b9a"));
|
||||
executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("061a95cab149723866ce7c797ba6bdd4"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a17e95c1191e3aef7892586fe38ca050"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -913,4 +913,19 @@ public class ReadUtils {
|
|||
return getBasesReverseComplement(read.getReadBases());
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the maximum read length from the given list of reads.
|
||||
* @param reads list of reads
|
||||
* @return non-negative integer
|
||||
*/
|
||||
@Ensures({"result >= 0"})
|
||||
public static int getMaxReadLength( final List<GATKSAMRecord> reads ) {
|
||||
if( reads == null ) { throw new IllegalArgumentException("Attempting to check a null list of reads."); }
|
||||
|
||||
int maxReadLength = 0;
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
maxReadLength = Math.max(maxReadLength, read.getReadLength());
|
||||
}
|
||||
return maxReadLength;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,9 +32,7 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class ReadUtilsUnitTest extends BaseTest {
|
||||
|
|
@ -165,4 +163,20 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(reconverted, original);
|
||||
}
|
||||
}
|
||||
|
||||
@Test (enabled = true)
|
||||
public void testGetMaxReadLength() {
|
||||
for( final int minLength : Arrays.asList( 5, 30, 50 ) ) {
|
||||
for( final int maxLength : Arrays.asList( 50, 75, 100 ) ) {
|
||||
final List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
|
||||
for( int readLength = minLength; readLength <= maxLength; readLength++ ) {
|
||||
reads.add( ReadUtils.createRandomRead( readLength ) );
|
||||
}
|
||||
Assert.assertEquals(ReadUtils.getMaxReadLength(reads), maxLength, "max length does not match");
|
||||
}
|
||||
}
|
||||
|
||||
final List<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
Assert.assertEquals(ReadUtils.getMaxReadLength(reads), 0, "Empty list should have max length of zero");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue