From 2bcbdd469f1c7a48a0c933e652d1c488274280b0 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 19 Apr 2013 16:11:29 -0400 Subject: [PATCH 1/4] leftAlignCigarSequentially now supports haplotypes with insertions and deletions where the deletion allele was previously removed by the leftAlignSingleIndel during it's cleanup phase. --- .../haplotypecaller/DeBruijnAssembler.java | 14 +++++++------- .../DeBruijnAssemblerUnitTest.java | 18 +++++++++++++----- .../sting/utils/sam/AlignmentUtils.java | 7 ++++--- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 12a4841bf..5a5946183 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -410,9 +410,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { // extend partial haplotypes which are anchored in the reference to include the full active region h = extendPartialHaplotype(h, activeRegionStart, refWithPadding); final Cigar leftAlignedCigar = leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(h.getCigar()), refWithPadding, h.getBases(), activeRegionStart, 0); - if( leftAlignedCigar.getReferenceLength() != refHaplotype.getCigar().getReferenceLength() ) { // left alignment failure - continue; - } if( !returnHaplotypes.contains(h) ) { h.setAlignmentStartHapwrtRef(activeRegionStart); h.setCigar(leftAlignedCigar); @@ -548,9 +545,8 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { final CigarElement ce = cigar.getCigarElement(i); if (ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I) { cigarToAlign.add(ce); - for( final CigarElement toAdd : AlignmentUtils.leftAlignIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false).getCigarElements() ) { - cigarToReturn.add(toAdd); - } + final Cigar leftAligned = AlignmentUtils.leftAlignSingleIndel(cigarToAlign, refSeq, readSeq, refIndex, readIndex, false); + for ( final CigarElement toAdd : leftAligned.getCigarElements() ) { cigarToReturn.add(toAdd); } refIndex += cigarToAlign.getReferenceLength(); readIndex += cigarToAlign.getReadLength(); cigarToAlign = new Cigar(); @@ -563,7 +559,11 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { cigarToReturn.add(toAdd); } } - return cigarToReturn; + + final Cigar result = AlignmentUtils.consolidateCigar(cigarToReturn); + if( result.getReferenceLength() != cigar.getReferenceLength() ) + throw new IllegalStateException("leftAlignCigarSequentially failed to produce a valid CIGAR. Reference lengths differ. Initial cigar " + cigar + " left aligned into " + result); + return result; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index e6dea4d11..e1559a13a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -52,10 +52,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; * Date: 3/27/12 */ -import net.sf.samtools.Cigar; -import net.sf.samtools.CigarElement; -import net.sf.samtools.CigarOperator; -import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.*; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; import org.broadinstitute.sting.utils.haplotype.Haplotype; @@ -125,6 +122,17 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { } } + @Test(enabled = true) + public void testLeftAlignCigarSequentiallyAdjacentID() { + final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; + final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; + final Cigar originalCigar = TextCigarCodec.getSingleton().decode("18M4I12M4D2M"); + + final Cigar result = new DeBruijnAssembler().leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0); + logger.warn("Result is " + result); + Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); + } + private static class MockBuilder extends DeBruijnGraphBuilder { public final List addedPairs = new LinkedList(); @@ -165,7 +173,7 @@ public class DeBruijnAssemblerUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "AddReadKmersToGraph") + @Test(dataProvider = "AddReadKmersToGraph", enabled = ! DEBUG) public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List badQualsSites) { final int readLen = bases.length(); final DeBruijnAssembler assembler = new DeBruijnAssembler(); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e48d1ca4c..59a22c550 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -664,7 +664,7 @@ public final class AlignmentUtils { if ( numIndels == 0 ) return cigar; if ( numIndels == 1 ) - return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex); + return leftAlignSingleIndel(cigar, refSeq, readSeq, refIndex, readIndex, true); // if we got here then there is more than 1 indel in the alignment if ( doNotThrowExceptionForMultipleIndels ) @@ -709,10 +709,11 @@ public final class AlignmentUtils { * @param readSeq read sequence * @param refIndex 0-based alignment start position on ref * @param readIndex 0-based alignment start position on read + * @param cleanupCigar if true, we'll cleanup the resulting cigar element, removing 0 length elements and deletions from the first cigar position * @return a non-null cigar, in which the single indel is guaranteed to be placed at the leftmost possible position across a repeat (if any) */ @Ensures("result != null") - public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex) { + public static Cigar leftAlignSingleIndel(Cigar cigar, final byte[] refSeq, final byte[] readSeq, final int refIndex, final int readIndex, final boolean cleanupCigar) { ensureLeftAlignmentHasGoodArguments(cigar, refSeq, readSeq, refIndex, readIndex); int indexOfIndel = -1; @@ -751,7 +752,7 @@ public final class AlignmentUtils { cigar = newCigar; i = -1; if (reachedEndOfRead) - cigar = cleanUpCigar(cigar); + cigar = cleanupCigar ? cleanUpCigar(cigar) : cigar; } if (reachedEndOfRead) From f5a301fb6354f0f3fad6cd01bd1c154ee15a1086 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 23 Apr 2013 18:26:20 -0400 Subject: [PATCH 2/4] Bugfix for AlignmentUtils.trimCigarByBases -- Previous version would trim down 2M2D2M into 2M if you asked for the first 2 bases, but this can result in incorrect alignment of the bases to the reference as the bases no longer span the full reference interval expected. Fixed and added unit tests --- .../org/broadinstitute/sting/utils/sam/AlignmentUtils.java | 4 ++-- .../sting/utils/sam/AlignmentUtilsUnitTest.java | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 59a22c550..fa35e3f53 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -934,7 +934,7 @@ public final class AlignmentUtils { */ public static Cigar trimCigarByBases(final Cigar cigar, final int start, final int end) { if ( start < 0 ) throw new IllegalArgumentException("Start must be >= 0 but got " + start); - if ( end < start ) throw new IllegalArgumentException("End " + end + " is < start start " + start); + if ( end < start ) throw new IllegalArgumentException("End " + end + " is < start = " + start); if ( end > cigar.getReadLength() ) throw new IllegalArgumentException("End is beyond the cigar's read length " + end + " for cigar " + cigar ); final Cigar result = trimCigar(cigar, start, end, false); @@ -962,7 +962,7 @@ public final class AlignmentUtils { int pos = 0; for ( final CigarElement elt : cigar.getCigarElements() ) { - if ( pos > end ) break; + if ( pos > end && (byReference || elt.getOperator() != CigarOperator.D) ) break; switch ( elt.getOperator() ) { case D: diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java index 2a2d80206..e7d54c460 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java @@ -792,7 +792,8 @@ public class AlignmentUtilsUnitTest { tests.add(new Object[]{"2M2D2I", 3, 3, "1I"}); tests.add(new Object[]{"2M2D2I", 2, 2, "2D1I"}); tests.add(new Object[]{"2M2D2I", 1, 2, "1M2D1I"}); - tests.add(new Object[]{"2M2D2I", 1, 1, "1M"}); + tests.add(new Object[]{"2M2D2I", 0, 1, "2M2D"}); + tests.add(new Object[]{"2M2D2I", 1, 1, "1M2D"}); return tests.toArray(new Object[][]{}); } From 0587a145bfdaca6fcca2acae9a259cde3997fa76 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 23 Apr 2013 18:28:41 -0400 Subject: [PATCH 3/4] Utils.dupString should allow 0 number of duplicates to produce empty string --- public/java/src/org/broadinstitute/sting/utils/Utils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index ff0ea958c..97a91179e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -294,7 +294,7 @@ public class Utils { */ public static String dupString(final String s, int nCopies) { if ( s == null || s.equals("") ) throw new IllegalArgumentException("Bad s " + s); - if ( nCopies < 1 ) throw new IllegalArgumentException("nCopies must be >= 1 but got " + nCopies); + if ( nCopies < 0 ) throw new IllegalArgumentException("nCopies must be >= 0 but got " + nCopies); final StringBuilder b = new StringBuilder(); for ( int i = 0; i < nCopies; i++ ) From f42bb86bdde92a6439284f77d9c378868ac32bc6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 24 Apr 2013 16:01:43 -0400 Subject: [PATCH 4/4] e# This is a combination of 2 commits. Only try to clip adaptors when both reads of the pair are on opposite strands -- Read pairs that have unusual alignments, such as two reads both oriented like: <----- <----- where previously having their adaptors clipped as though the standard calculation of the insert size was meaningful, which it is not for such oddly oriented pairs. This caused us to clip extra good bases from reads. -- Update MD5s due change in adaptor clipping, which add some coverage in some places --- .../ReduceReadsIntegrationTest.java | 2 +- .../ErrorRatePerCycleIntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 6 ++--- ...dGenotyperIndelCallingIntegrationTest.java | 10 ++++---- ...GenotyperNormalCallingIntegrationTest.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 4 +-- .../HaplotypeCallerIntegrationTest.java | 2 +- .../sting/utils/sam/ReadUtils.java | 15 +++++++++-- .../coverage/CallableLociIntegrationTest.java | 8 +++--- .../sting/utils/sam/ReadUtilsUnitTest.java | 25 +++++++++++++++++++ 10 files changed, 56 insertions(+), 20 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index b5963498a..15b54dbd1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -259,7 +259,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1663f35802f82333c5e15653e437ce2d"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c459a6153a17c2cbf8441e1918fda9c8"))); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java index b435fc2eb..84020e2d0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/ErrorRatePerCycleIntegrationTest.java @@ -57,7 +57,7 @@ public class ErrorRatePerCycleIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T ErrorRatePerCycle -R " + b37KGReference + " -I " + b37GoodBAM + " -L 20:10,000,000-10,100,000 -o %s", 1, - Arrays.asList("dccdf3cb3193d01a1a767097e4a5c35e")); + Arrays.asList("6191340f0b56ee81fb248c8f5c913a8e")); executeTest("ErrorRatePerCycle:", spec); } } \ No newline at end of file diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index ecc1ab66e..86961988d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,16 +58,16 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","5eabc12fc7b4f9749e6d1be0f5b45d14"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","9f960977b1b8d90ac75ba4306336553c"); } @Test(enabled = true) public void testMT_SNP_DISCOVERY_sp4() { - executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","71b7b9eac1e4a9b2b7e8c3689d1f29ec"); + executor.PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","eb8b008a463f9fa9ad0155bf2f5b78b3"); } @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fc36e925e269b035d4b27edb661be06b"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "fb988e9b93bc73b5e532584c83cac833"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 6b7631039..856e97ebe 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("ea4c4e020f59f48901d5820c8e4f6001")); + Arrays.asList("19f77f557150905ef3fa4713f611a1b9")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("8d9b8f8a1479322961c840e461b6dba8")); + Arrays.asList("bb3dbad9666ebf38d338f0c9c211a42e")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("79f7fc64da8f25eda1f1ee139ecdd657")); + Arrays.asList("8052390ca2b6a57c3ddf379a51225d64")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("15508fcf61380a91b6611307f182447b")); + Arrays.asList("b6b9dba97fbabaeeb458a41051983e7b")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("3d4d66cc253eac55f16e5b0a36f17d8d")); + Arrays.asList("38730c7030271f5d0ca0b59365d57814")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 38ab8ee36..907af0f34 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("a6c224235c21b4af816b1512eb0624df")); + Arrays.asList("5e8f1fa88dc93320cc0e75e9fe6e153b")); executeTest("test MultiSample Pilot1", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 9d4c52798..90d7f493c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "7d2cc5c4ece386beedf6b07dfbe5bf26"); + "5954a46971b7546d30151b068cded42a"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "a17856f709b546eaed486841d78248d2"); + "b3684c670f68f5a3a348a7fd2b25f10a"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d5e163a88..03d3e8a17 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -96,7 +96,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "e2d32d0dce2c5502a8e877f6bbb65a10"); + "65188ec4e3b91796f62bfb5b965ccf1f"); } @Test diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 0eed80f3a..0db3aa043 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import net.sf.samtools.*; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; @@ -47,6 +48,7 @@ import java.util.*; * @version 0.1 */ public class ReadUtils { + private final static Logger logger = Logger.getLogger(ReadUtils.class); private static final String OFFSET_OUT_OF_BOUNDS_EXCEPTION = "Offset cannot be greater than read length %d : %d"; private static final String OFFSET_NOT_ZERO_EXCEPTION = "We ran past the end of the read and never found the offset, something went wrong!"; @@ -209,7 +211,16 @@ public class ReadUtils { if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; - + + if ( read.getReadPairedFlag() && read.getReadNegativeStrandFlag() == read.getMateNegativeStrandFlag() ) { + // note that the read.getProperPairFlag() is not reliably set, so many reads may have this tag but still be overlapping +// logger.info(String.format("Read %s start=%d end=%d insert=%d mateStart=%d readNeg=%b mateNeg=%b not properly paired, returning CANNOT_COMPUTE_ADAPTOR_BOUNDARY", +// read.getReadName(), read.getAlignmentStart(), read.getAlignmentEnd(), insertSize, read.getMateAlignmentStart(), +// read.getReadNegativeStrandFlag(), read.getMateNegativeStrandFlag())); + return CANNOT_COMPUTE_ADAPTOR_BOUNDARY; + } + + int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read) if (read.getReadNegativeStrandFlag()) adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header) @@ -218,7 +229,7 @@ public class ReadUtils { if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) ) adaptorBoundary = CANNOT_COMPUTE_ADAPTOR_BOUNDARY; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor - + return adaptorBoundary; } public static int CANNOT_COMPUTE_ADAPTOR_BOUNDARY = Integer.MIN_VALUE; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java index c07bf171a..336c15ccc 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociIntegrationTest.java @@ -34,13 +34,13 @@ public class CallableLociIntegrationTest extends WalkerTest { final static String commonArgs = "-R " + b36KGReference + " -T CallableLoci -I " + validationDataLocation + "/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s"; final static String reduceReadArgs = "-R " + b37KGReference + " -T CallableLoci -I " + " private/testdata/NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s"; - final static String SUMMARY_MD5 = "ffdbd9cdcb4169ebed5ae4bec797260f"; + final static String SUMMARY_MD5 = "a6f5963669f19d9d137ced87d65834b0"; @Test public void testCallableLociWalkerBed() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5)); + Arrays.asList("9b4ffea1dbcfefadeb1c9fa74b0e0e59", SUMMARY_MD5)); executeTest("formatBed", spec); } @@ -48,7 +48,7 @@ public class CallableLociIntegrationTest extends WalkerTest { public void testCallableLociWalkerPerBase() { String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5)); + Arrays.asList("d6505e489899e80c08a7168777f6e07b", SUMMARY_MD5)); executeTest("format_state_per_base", spec); } @@ -64,7 +64,7 @@ public class CallableLociIntegrationTest extends WalkerTest { public void testCallableLociWalker3() { String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s"; WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2, - Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7")); + Arrays.asList("7f79ad8195c4161060463eeb21d2bb11", "7ee269e5f4581a924529a356cc806e55")); executeTest("formatBed lots of arguments", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 331121c55..abe0c394b 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -151,6 +151,31 @@ public class ReadUtilsUnitTest extends BaseTest { read.setReadNegativeStrandFlag(false); boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); + + // Test case 8: read doesn't have proper pair flag set + read = makeRead(fragmentSize, mateStart); + read.setReadPairedFlag(true); + read.setProperPairFlag(false); + Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); + + // Test case 9: read and mate have same negative flag setting + for ( final boolean negFlag: Arrays.asList(true, false) ) { + read = makeRead(fragmentSize, mateStart); + read.setAlignmentStart(BEFORE); + read.setReadPairedFlag(true); + read.setProperPairFlag(true); + read.setReadNegativeStrandFlag(negFlag); + read.setMateNegativeStrandFlag(!negFlag); + Assert.assertTrue(get.getAdaptor(read) != ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have succeeded"); + + read = makeRead(fragmentSize, mateStart); + read.setAlignmentStart(BEFORE); + read.setReadPairedFlag(true); + read.setProperPairFlag(true); + read.setReadNegativeStrandFlag(negFlag); + read.setMateNegativeStrandFlag(negFlag); + Assert.assertEquals(get.getAdaptor(read), ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY, "Get adaptor should have failed for reads with bad alignment orientation"); + } } @Test (enabled = true)