From 6d7ec7377c40d94a01e79c176dd19acee92e0a85 Mon Sep 17 00:00:00 2001 From: Ron Levine Date: Fri, 11 Mar 2016 20:39:54 -0500 Subject: [PATCH] Set --reference_window_stop if homopolymer is greater than window size --- .../walkers/annotator/HomopolymerRun.java | 20 ++++++++++++------- .../VariantAnnotatorIntegrationTest.java | 18 +++++++++++++++++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/HomopolymerRun.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/HomopolymerRun.java index d21dd1fbc..5b02cada2 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/HomopolymerRun.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/HomopolymerRun.java @@ -51,6 +51,7 @@ package org.broadinstitute.gatk.tools.walkers.annotator; +import org.apache.log4j.Logger; import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; @@ -83,7 +84,11 @@ import java.util.Map; */ public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalAnnotation { + private final static Logger logger = Logger.getLogger(HardyWeinberg.class); + private boolean ANNOTATE_INDELS = true; + + private final static int BAD_RUN_LENGTH = -1; public Map annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, @@ -103,6 +108,9 @@ public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalA } else { return null; } + + if ( run == BAD_RUN_LENGTH ) + return null; Map map = new HashMap<>(); map.put(getKeyNames().get(0), String.format("%d", run)); @@ -149,15 +157,13 @@ public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalA GenomeLoc window = ref.getWindow(); int refBasePos = (int) (locus.getStart() - window.getStart())+1; if ( vc.isSimpleDeletion() ) { - // check that deleted bases are the same - byte dBase = bases[refBasePos]; - for ( int i = 0; i < vc.getReference().length(); i ++ ) { - if ( bases[refBasePos+i] != dBase ) { - return 0; - } + if ( refBasePos + vc.getReference().length() - 1 >= bases.length ) { + logger.warn("Encountered a homopolymer at " + locus.toString() + " longer than the tool's default window size, so the position was skipped. " + + "To process this position, add --reference_window_stop to your command with a value equal or greater than " + vc.getReference().length()); + return BAD_RUN_LENGTH; } - return computeHomopolymerRun(dBase, ref); + return computeHomopolymerRun(bases[refBasePos], ref); } else { // check that inserted bases are the same byte insBase = vc.getAlternateAllele(0).getBases()[0]; diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java index f518fb468..b0f1ddc4e 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -505,4 +505,22 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { Assert.assertFalse(lineIterator.hasNext()); Assert.assertFalse(lineIteratorAnn.hasNext()); } + + @Test + public void testHomopolymerRunWindow() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " -A HomopolymerRun --variant:vcf " + privateTestDir + "problem_del.vcf " + + "-U ALLOW_SEQ_DICT_INCOMPATIBILITY -L chr18:44382010-44384010 --reference_window_stop 59 --no_cmdline_in_header -o %s", 1, + Arrays.asList("bda55495578147b2390d850d7fb25a12")); + executeTest("Testing testHomopolymerRunWindow", spec); + } + + @Test + public void testHomopolymerRunTooBig() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " -A HomopolymerRun --variant:vcf " + privateTestDir + "problem_del.vcf " + + "-U ALLOW_SEQ_DICT_INCOMPATIBILITY -L chr18:44382010-44384010 --no_cmdline_in_header -o %s", 1, + Arrays.asList("e20b12fd45f37a7bb31a2f2e91983477")); + executeTest("Testing HomopolymerRunTooBig", spec); + } }