e# This is a combination of 2 commits.

Only try to clip adaptors when both reads of the pair are on opposite strands

-- Read pairs that have unusual alignments, such as two reads both oriented like:

  <-----
     <-----

where previously having their adaptors clipped as though the standard calculation of the insert size was meaningful, which it is not for such oddly oriented pairs.  This caused us to clip extra good bases from reads.
-- Update MD5s due change in adaptor clipping, which add some coverage in some places
This commit is contained in:
Mark DePristo 2013-04-24 16:01:43 -04:00
parent 0587a145bf
commit f42bb86bdd
10 changed files with 56 additions and 20 deletions

View File

@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
public void testDivideByZero() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
// we expect to lose coverage due to the downsampling so don't run the systematic tests
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d")));
executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c459a6153a17c2cbf8441e1918fda9c8")));
}
/**

View File

@ -57,7 +57,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s",
1,
Arrays.asList("dccdf3cb3193d01a1a767097e4a5c35e"));
Arrays.asList("6191340f0b56ee81fb248c8f5c913a8e"));
executeTest("ErrorRatePerCycle:", spec);
}
}

View File

@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14");
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c");
}
@Test(enabled = true)
public void testMT_SNP_DISCOVERY_sp4() {
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","71b7b9eac1e4a9b2b7e8c3689d1f29ec");
executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","eb8b008a463f9fa9ad0155bf2f5b78b3");
}
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fc36e925e269b035d4b27edb661be06b");
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fb988e9b93bc73b5e532584c83cac833");
}
}

View File

@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("ea4c4e020f59f48901d5820c8e4f6001"));
Arrays.asList("19f77f557150905ef3fa4713f611a1b9"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("8d9b8f8a1479322961c840e461b6dba8"));
Arrays.asList("bb3dbad9666ebf38d338f0c9c211a42e"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("79f7fc64da8f25eda1f1ee139ecdd657"));
Arrays.asList("8052390ca2b6a57c3ddf379a51225d64"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
}
@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
Arrays.asList("15508fcf61380a91b6611307f182447b"));
Arrays.asList("b6b9dba97fbabaeeb458a41051983e7b"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
}
@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
Arrays.asList("3d4d66cc253eac55f16e5b0a36f17d8d"));
Arrays.asList("38730c7030271f5d0ca0b59365d57814"));
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
}

View File

@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("a6c224235c21b4af816b1512eb0624df"));
Arrays.asList("5e8f1fa88dc93320cc0e75e9fe6e153b"));
executeTest("test MultiSample Pilot1", spec);
}

View File

@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"7d2cc5c4ece386beedf6b07dfbe5bf26");
"5954a46971b7546d30151b068cded42a");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"a17856f709b546eaed486841d78248d2");
"b3684c670f68f5a3a348a7fd2b25f10a");
}
}

View File

@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"e2d32d0dce2c5502a8e877f6bbb65a10");
"65188ec4e3b91796f62bfb5b965ccf1f");
}
@Test

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.*;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -47,6 +48,7 @@ import java.util.*;
* @version 0.1
*/
public class ReadUtils {
private final static Logger logger = Logger.getLogger(ReadUtils.class);
private static final String OFFSET_OUT_OF_BOUNDS_EXCEPTION = "Offset cannot be greater than read length %d : %d";
private static final String OFFSET_NOT_ZERO_EXCEPTION = "We ran past the end of the read and never found the offset, something went wrong!";
@ -209,7 +211,16 @@ public class ReadUtils {
if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs
return CANNOT_COMPUTE_ADAPTOR_BOUNDARY;
if ( read.getReadPairedFlag() && read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) {
// note that the read.getProperPairFlag() is not reliably set, so many reads may have this tag but still be overlapping
// logger.info(String.format("Read %s start=%d end=%d insert=%d mateStart=%d readNeg=%b mateNeg=%b not properly paired, returning CANNOT_COMPUTE_ADAPTOR_BOUNDARY",
// read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), insertSize, read.getMateAlignmentStart(),
// read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag()));
return CANNOT_COMPUTE_ADAPTOR_BOUNDARY;
}
int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
if (read.getReadNegativeStrandFlag())
adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
@ -218,7 +229,7 @@ public class ReadUtils {
if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) )
adaptorBoundary = CANNOT_COMPUTE_ADAPTOR_BOUNDARY; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor
return adaptorBoundary;
}
public static int CANNOT_COMPUTE_ADAPTOR_BOUNDARY = Integer.MIN_VALUE;

View File

@ -34,13 +34,13 @@ public class CallableLociIntegrationTest extends WalkerTest {
final static String commonArgs = "-R " + b36KGReference + " -T CallableLoci -I " + validationDataLocation + "/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s";
final static String reduceReadArgs = "-R " + b37KGReference + " -T CallableLoci -I " + " private/testdata/NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s";
final static String SUMMARY_MD5 = "ffdbd9cdcb4169ebed5ae4bec797260f";
final static String SUMMARY_MD5 = "a6f5963669f19d9d137ced87d65834b0";
@Test
public void testCallableLociWalkerBed() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5));
Arrays.asList("9b4ffea1dbcfefadeb1c9fa74b0e0e59", SUMMARY_MD5));
executeTest("formatBed", spec);
}
@ -48,7 +48,7 @@ public class CallableLociIntegrationTest extends WalkerTest {
public void testCallableLociWalkerPerBase() {
String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5));
Arrays.asList("d6505e489899e80c08a7168777f6e07b", SUMMARY_MD5));
executeTest("format_state_per_base", spec);
}
@ -64,7 +64,7 @@ public class CallableLociIntegrationTest extends WalkerTest {
public void testCallableLociWalker3() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7"));
Arrays.asList("7f79ad8195c4161060463eeb21d2bb11", "7ee269e5f4581a924529a356cc806e55"));
executeTest("formatBed lots of arguments", spec);
}

View File

@ -151,6 +151,31 @@ public class ReadUtilsUnitTest extends BaseTest {
read.setReadNegativeStrandFlag(false);
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
// Test case 8: read doesn't have proper pair flag set
read = makeRead(fragmentSize, mateStart);
read.setReadPairedFlag(true);
read.setProperPairFlag(false);
Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
// Test case 9: read and mate have same negative flag setting
for ( final boolean negFlag: Arrays.asList(true, false) ) {
read = makeRead(fragmentSize, mateStart);
read.setAlignmentStart(BEFORE);
read.setReadPairedFlag(true);
read.setProperPairFlag(true);
read.setReadNegativeStrandFlag(negFlag);
read.setMateNegativeStrandFlag(!negFlag);
Assert.assertTrue(get.getAdaptor(read) != ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have succeeded");
read = makeRead(fragmentSize, mateStart);
read.setAlignmentStart(BEFORE);
read.setReadPairedFlag(true);
read.setProperPairFlag(true);
read.setReadNegativeStrandFlag(negFlag);
read.setMateNegativeStrandFlag(negFlag);
Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have failed for reads with bad alignment orientation");
}
}
@Test (enabled = true)