e# This is a combination of 2 commits.
Only try to clip adaptors when both reads of the pair are on opposite strands
-- Read pairs that have unusual alignments, such as two reads both oriented like:
<-----
<-----
where previously having their adaptors clipped as though the standard calculation of the insert size was meaningful, which it is not for such oddly oriented pairs. This caused us to clip extra good bases from reads.
-- Update MD5s due change in adaptor clipping, which add some coverage in some places
This commit is contained in:
parent
0587a145bf
commit
f42bb86bdd
|
|
@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
|
|||
public void testDivideByZero() {
|
||||
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
|
||||
// we expect to lose coverage due to the downsampling so don't run the systematic tests
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d")));
|
||||
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c459a6153a17c2cbf8441e1918fda9c8")));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
|
||||
1,
|
||||
Arrays.asList("dccdf3cb3193d01a1a767097e4a5c35e"));
|
||||
Arrays.asList("6191340f0b56ee81fb248c8f5c913a8e"));
|
||||
executeTest("ErrorRatePerCycle:", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_DISCOVERY_sp4() {
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","71b7b9eac1e4a9b2b7e8c3689d1f29ec");
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","eb8b008a463f9fa9ad0155bf2f5b78b3");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testMT_SNP_GGA_sp10() {
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fc36e925e269b035d4b27edb661be06b");
|
||||
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fb988e9b93bc73b5e532584c83cac833");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("ea4c4e020f59f48901d5820c8e4f6001"));
|
||||
Arrays.asList("19f77f557150905ef3fa4713f611a1b9"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("8d9b8f8a1479322961c840e461b6dba8"));
|
||||
Arrays.asList("bb3dbad9666ebf38d338f0c9c211a42e"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("79f7fc64da8f25eda1f1ee139ecdd657"));
|
||||
Arrays.asList("8052390ca2b6a57c3ddf379a51225d64"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("15508fcf61380a91b6611307f182447b"));
|
||||
Arrays.asList("b6b9dba97fbabaeeb458a41051983e7b"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("3d4d66cc253eac55f16e5b0a36f17d8d"));
|
||||
Arrays.asList("38730c7030271f5d0ca0b59365d57814"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("a6c224235c21b4af816b1512eb0624df"));
|
||||
Arrays.asList("5e8f1fa88dc93320cc0e75e9fe6e153b"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"7d2cc5c4ece386beedf6b07dfbe5bf26");
|
||||
"5954a46971b7546d30151b068cded42a");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"a17856f709b546eaed486841d78248d2");
|
||||
"b3684c670f68f5a3a348a7fd2b25f10a");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"e2d32d0dce2c5502a8e877f6bbb65a10");
|
||||
"65188ec4e3b91796f62bfb5b965ccf1f");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -47,6 +48,7 @@ import java.util.*;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class ReadUtils {
|
||||
private final static Logger logger = Logger.getLogger(ReadUtils.class);
|
||||
|
||||
private static final String OFFSET_OUT_OF_BOUNDS_EXCEPTION = "Offset cannot be greater than read length %d : %d";
|
||||
private static final String OFFSET_NOT_ZERO_EXCEPTION = "We ran past the end of the read and never found the offset, something went wrong!";
|
||||
|
|
@ -209,7 +211,16 @@ public class ReadUtils {
|
|||
|
||||
if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs
|
||||
return CANNOT_COMPUTE_ADAPTOR_BOUNDARY;
|
||||
|
||||
|
||||
if ( read.getReadPairedFlag() && read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) {
|
||||
// note that the read.getProperPairFlag() is not reliably set, so many reads may have this tag but still be overlapping
|
||||
// logger.info(String.format("Read %s start=%d end=%d insert=%d mateStart=%d readNeg=%b mateNeg=%b not properly paired, returning CANNOT_COMPUTE_ADAPTOR_BOUNDARY",
|
||||
// read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), insertSize, read.getMateAlignmentStart(),
|
||||
// read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag()));
|
||||
return CANNOT_COMPUTE_ADAPTOR_BOUNDARY;
|
||||
}
|
||||
|
||||
|
||||
int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
|
||||
|
|
@ -218,7 +229,7 @@ public class ReadUtils {
|
|||
|
||||
if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) )
|
||||
adaptorBoundary = CANNOT_COMPUTE_ADAPTOR_BOUNDARY; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor
|
||||
|
||||
|
||||
return adaptorBoundary;
|
||||
}
|
||||
public static int CANNOT_COMPUTE_ADAPTOR_BOUNDARY = Integer.MIN_VALUE;
|
||||
|
|
|
|||
|
|
@ -34,13 +34,13 @@ public class CallableLociIntegrationTest extends WalkerTest {
|
|||
final static String commonArgs = "-R " + b36KGReference + " -T CallableLoci -I " + validationDataLocation + "/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s";
|
||||
final static String reduceReadArgs = "-R " + b37KGReference + " -T CallableLoci -I " + " private/testdata/NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s";
|
||||
|
||||
final static String SUMMARY_MD5 = "ffdbd9cdcb4169ebed5ae4bec797260f";
|
||||
final static String SUMMARY_MD5 = "a6f5963669f19d9d137ced87d65834b0";
|
||||
|
||||
@Test
|
||||
public void testCallableLociWalkerBed() {
|
||||
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5));
|
||||
Arrays.asList("9b4ffea1dbcfefadeb1c9fa74b0e0e59", SUMMARY_MD5));
|
||||
executeTest("formatBed", spec);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class CallableLociIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalkerPerBase() {
|
||||
String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5));
|
||||
Arrays.asList("d6505e489899e80c08a7168777f6e07b", SUMMARY_MD5));
|
||||
executeTest("format_state_per_base", spec);
|
||||
}
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ public class CallableLociIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalker3() {
|
||||
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7"));
|
||||
Arrays.asList("7f79ad8195c4161060463eeb21d2bb11", "7ee269e5f4581a924529a356cc806e55"));
|
||||
executeTest("formatBed lots of arguments", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -151,6 +151,31 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
|
||||
// Test case 8: read doesn't have proper pair flag set
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setProperPairFlag(false);
|
||||
Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
|
||||
// Test case 9: read and mate have same negative flag setting
|
||||
for ( final boolean negFlag: Arrays.asList(true, false) ) {
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
read.setAlignmentStart(BEFORE);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setProperPairFlag(true);
|
||||
read.setReadNegativeStrandFlag(negFlag);
|
||||
read.setMateNegativeStrandFlag(!negFlag);
|
||||
Assert.assertTrue(get.getAdaptor(read) != ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have succeeded");
|
||||
|
||||
read = makeRead(fragmentSize, mateStart);
|
||||
read.setAlignmentStart(BEFORE);
|
||||
read.setReadPairedFlag(true);
|
||||
read.setProperPairFlag(true);
|
||||
read.setReadNegativeStrandFlag(negFlag);
|
||||
read.setMateNegativeStrandFlag(negFlag);
|
||||
Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have failed for reads with bad alignment orientation");
|
||||
}
|
||||
}
|
||||
|
||||
@Test (enabled = true)
|
||||
|
|
|
|||
Loading…
Reference in New Issue