From abc4d5b7b3fdfd6d9d6199dec74dc0c7a478d306 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Tue, 26 Jul 2016 12:55:14 -0400 Subject: [PATCH] Bypass spanning deletions in Rank Sum tests --- .../walkers/annotator/AS_RankSumTest.java | 3 +- .../annotator/AS_ReadPosRankSumTest.java | 2 + .../tools/walkers/annotator/RankSumTest.java | 10 +++-- .../walkers/annotator/ReadPosRankSumTest.java | 5 +++ ...perGeneralPloidySuite1IntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 2 +- ...dGenotyperIndelCallingIntegrationTest.java | 2 +- .../UnifiedGenotyperIntegrationTest.java | 2 +- ...GenotyperNormalCallingIntegrationTest.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 6 +-- .../HaplotypeCallerGVCFIntegrationTest.java | 9 ++++ .../HaplotypeCallerIntegrationTest.java | 14 +++--- .../gatk/utils/sam/AlignmentUtils.java | 45 +++++++++++++++++++ .../utils/sam/AlignmentUtilsUnitTest.java | 22 +++++++++ 14 files changed, 106 insertions(+), 20 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java index 1f15d17b5..f413f55cc 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_RankSumTest.java @@ -277,7 +277,8 @@ public abstract class AS_RankSumTest extends RankSumTest implements ReducibleAnn final GATKSAMRecord read = el.getKey(); if ( isUsableRead(read, refLoc) ) { final Double value = getElementForRead(read, refLoc, a); - if ( value == null ) + // Bypass read if the clipping goal is not reached or the refloc is inside a spanning deletion + if ( value == null || value < 0.0 ) continue; if(perAlleleValues.containsKey(a.getMostLikelyAllele())) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_ReadPosRankSumTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_ReadPosRankSumTest.java index 2bd2eb9fd..e0a92af1d 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_ReadPosRankSumTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AS_ReadPosRankSumTest.java @@ -104,6 +104,8 @@ public class AS_ReadPosRankSumTest extends AS_RankSumTest implements AS_Standard int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), offset, false, 0, 0); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); + // Note: For a spanning deletion, readPos is at the upstream end of the deletion and is greater than numAlignedBases (which does not include deletions). + // Hence, the resulting readPos will have a negative value. if (readPos > numAlignedBases / 2) readPos = numAlignedBases - (readPos + 1); return (double)readPos; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumTest.java index cca58bd66..b1f7bb63e 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/RankSumTest.java @@ -76,6 +76,7 @@ import java.util.*; //TODO: will eventually implement ReducibleAnnotation in order to preserve accuracy for CombineGVCFs and GenotypeGVCFs -- see RMSAnnotation.java for an example of an abstract ReducibleAnnotation public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation { static final boolean DEBUG = false; + protected static double INVALID_READ_POSITION = -1; // No mapping to a read position public Map annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, @@ -86,11 +87,11 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR // either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null final GenotypesContext genotypes = vc.getGenotypes(); - if (genotypes == null || genotypes.size() == 0) + if (genotypes == null || genotypes.isEmpty()) return null; - final ArrayList refQuals = new ArrayList<>(); - final ArrayList altQuals = new ArrayList<>(); + final List refQuals = new ArrayList<>(); + final List altQuals = new ArrayList<>(); for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { @@ -183,7 +184,8 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR final GATKSAMRecord read = el.getKey(); if ( isUsableRead(read, refLoc) ) { final Double value = getElementForRead(read, refLoc, a); - if ( value == null ) + // Bypass read if the clipping goal is not reached or the refloc is inside a spanning deletion + if ( value == null || value == INVALID_READ_POSITION ) continue; if ( a.getMostLikelyAllele().isReference() ) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ReadPosRankSumTest.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ReadPosRankSumTest.java index 51890fc33..fbba479e4 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ReadPosRankSumTest.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/ReadPosRankSumTest.java @@ -104,6 +104,11 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) return null; + // If the offset inside a deletion, it does not lie on a read. + if ( AlignmentUtils.isInsideDeletion(read.getCigar(), offset) ) { + return INVALID_READ_POSITION; + } + int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, 0, 0 ); final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); if (readPos > numAlignedBases / 2) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 92fb16387..456f5e8ca 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -88,6 +88,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe //TODO the old MD5 is kept for the record. //TODO this should be revisit once we get into addressing inaccuracies by the independent allele approach. // executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b5ff7530827f4b9039a58bdc8a3560d2"); - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "b99416c04ba951577f43fd2d25f46388"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "7421a776c75d0ab5a2ff89d9e7f105ff"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 4262ab665..ff0f23666 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -63,7 +63,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","1d27eaa3557dc28c95b9024114d50ad1"); + executor.PC_LSV_Test_NoRef("-A AlleleCountBySample -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","f4092488c9785d800c3f6470af7119ce"); } @Test(enabled = true) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 791e4ac6d..2b94dfc7a 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -140,7 +140,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("08967b41ccc76b1f3c7093e51a90713a")); + Arrays.asList("75b5a925c2009a8c14ea34fff3d04443")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 7d45eddb5..3cdfec3ba 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -310,7 +310,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000 " + "-A SnpEff", 1, - Arrays.asList("e99f100fe71bb7f328b485204c16f14a")); + Arrays.asList("65641c92469ab80513b04144d0eae900")); executeTest("testSnpEffAnnotationRequestedWithoutRodBinding", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 7348e12b1..8278148ec 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -70,7 +70,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("c759b04ed0d948bda95008e29f3f5c2d")); + Arrays.asList("e7f216d2f9857a579ef3e211076b37a4")); executeTest("test MultiSample Pilot1", spec); } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 383b85801..af1987adf 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -72,7 +72,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "113ae4c0244c50243313a7d6e77da26b"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "86528820f8c102c712d9562b83204c05"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -96,7 +96,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "8f8680bd8e1549ad88691c9c8af9977c"); + "828ef27284bd4045148728952b3a7d94"); } @Test @@ -114,7 +114,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleConsensusModeComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", - "353f1895047b15b1fec22b559c9da0c1"); + "060eed2610eed818b2ab55d582eb22ec"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 402184e33..bb7cd0a12 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -453,4 +453,13 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { spec.disableShadowBCF(); executeTest("testBadGQBValues", spec); } + + @Test + public void testHaplotypeCallerGVCSpanDel() { + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L 1:26357667 -ERC GVCF --no_cmdline_in_header -A AS_ReadPosRankSumTest -A ReadPosRankSumTest -variant_index_type %s -variant_index_parameter %d", + b37KGReference, privateTestDir + "NexPond-377866-1:26357600-26357700.bam", GATKVCFUtils.DEFAULT_GVCF_INDEX_TYPE, GATKVCFUtils.DEFAULT_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("93bc22340e6a4b01a7b96e5a3a12dfc3")); + spec.disableShadowBCF(); + executeTest("testHaplotypeCallerGVCSpanDel", spec); + } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index e34f734e3..31bd7d0a8 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -107,7 +107,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeBAMOutFlags() throws IOException { - HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "08943fb76d1cd5b5b8815e3991754911", "6a81bbefa6c4ed7a6b8d2c3e0e5a4756"); + HCTestWithBAMOut(NA12878_BAM, " -L 20:10000000-10100000 ", "56086abc3bd5e3f7d111f452b7cc4fa1", "6a81bbefa6c4ed7a6b8d2c3e0e5a4756"); } @Test @@ -153,17 +153,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerGraphBasedSingleSample() throws IOException { - HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "ba0dc5f416d69558cb5dd3e0a0a5a084"); + HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "8ab21bd6fb7ef37480f556fd5fa5375c"); } @Test public void testHaplotypeCallerGraphBasedMultiSampleHaploid() throws IOException { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "129bca18bb9eec23004b2d28aa541de2"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "01220e85ff6bc49e35a325a1df2519e5"); } @Test public void testHaplotypeCallerGraphBasedMultiSample() throws IOException { - HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "2b89c9e102a049e223bc0d91156a08a3"); + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "80c5b0f72a7962e1ba846ec20465001f"); } @Test @@ -398,7 +398,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testLackSensitivityDueToBadHaplotypeSelectionFix() { final String commandLine = String.format("-T HaplotypeCaller -pairHMMSub %s %s -R %s -I %s -L %s --no_cmdline_in_header --maxNumHaplotypesInPopulation 16", HMM_SUB_IMPLEMENTATION, ALWAYS_LOAD_VECTOR_HMM, b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list"); - final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("22f5a3e9366e611509f03c984f8b4960")); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("5087a8855b3ee9ea1091367674783462")); spec.disableShadowBCF(); executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); } @@ -484,12 +484,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerTandemRepeatAnnotator() throws IOException{ - HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A TandemRepeatAnnotator -XA MappingQualityZero -XA SpanningDeletions", "34328c475325b7dfaa57ab5920478e0c"); + HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A TandemRepeatAnnotator -XA MappingQualityZero -XA SpanningDeletions", "2cf4cab0035d09aa0aec6f3faa2c9df6"); } @Test public void testHBaseCountsBySample() throws IOException{ - HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "f5ad4e03c0faaa806ee6ae536af8a479"); + HCTest(NA12878_BAM, " -L 20:10001000-10010000 -A BaseCountsBySample", "c4550a5933cc954bad70980750e0df52"); } @Test diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java index 5aae6af85..c3d6b5cac 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/sam/AlignmentUtils.java @@ -538,6 +538,51 @@ public final class AlignmentUtils { return alignmentPos; } + /** + * Is the offset inside a deletion? + * + * @param cigar the read's CIGAR -- cannot be null + * @param offset the offset into the CIGAR + * @return true if the offset is inside a deletion, false otherwise + */ + public static boolean isInsideDeletion(final Cigar cigar, final int offset) { + if ( cigar == null ) throw new IllegalArgumentException("attempting to find the alignment position from a CIGAR that is null"); + if ( offset < 0 ) return false; + + // pos counts read bases + int pos = 0; + int prevPos = 0; + + for (final CigarElement ce : cigar.getCigarElements()) { + + switch (ce.getOperator()) { + case I: + case S: + case D: + case M: + case EQ: + case X: + prevPos = pos; + pos += ce.getLength(); + break; + case H: + case P: + case N: + break; + default: + throw new ReviewedGATKException("Unsupported cigar operator: " + ce.getOperator()); + } + + // Is the offset inside a deletion? + if ( prevPos < offset && pos >= offset && ce.getOperator() == CigarOperator.D ) { + return true; + + } + } + + return false; + } + /** * Generate an array of bases for just those that are aligned to the reference (i.e. no clips or insertions) * diff --git a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java index 24351842f..160d2e51f 100644 --- a/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/gatk-utils/src/test/java/org/broadinstitute/gatk/utils/sam/AlignmentUtilsUnitTest.java @@ -515,6 +515,28 @@ public class AlignmentUtilsUnitTest { Assert.assertEquals(actual, expectedResult, "Wrong alignment offset detected for cigar " + cigar.toString()); } + @Test + public void testIsInsideDeletion() { + final List cigarElements = Arrays.asList(new CigarElement(5, CigarOperator.S), + new CigarElement(5, CigarOperator.M), + new CigarElement(5, CigarOperator.EQ), + new CigarElement(6, CigarOperator.N), + new CigarElement(5, CigarOperator.X), + new CigarElement(6, CigarOperator.D), + new CigarElement(1, CigarOperator.P), + new CigarElement(1, CigarOperator.H)); + final Cigar cigar = new Cigar(cigarElements); + for ( int i=-1; i <= 20; i++ ) { + Assert.assertFalse(AlignmentUtils.isInsideDeletion(cigar, i)); + } + for ( int i=21; i <= 26; i++ ){ + Assert.assertTrue(AlignmentUtils.isInsideDeletion(cigar, i)); + } + for ( int i=27; i <= 28; i++ ) { + Assert.assertFalse(AlignmentUtils.isInsideDeletion(cigar, i)); + } + } + //////////////////////////////////////////////////// // Test AlignmentUtils.readToAlignmentByteArray() // ////////////////////////////////////////////////////