Set --reference_window_stop if homopolymer is greater than window size

This commit is contained in:
Ron Levine 2016-03-11 20:39:54 -05:00
parent 2baf7d8c4e
commit 6d7ec7377c
2 changed files with 31 additions and 7 deletions

View File

@ -51,6 +51,7 @@
package org.broadinstitute.gatk.tools.walkers.annotator; package org.broadinstitute.gatk.tools.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.utils.contexts.AlignmentContext; import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext; import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
@ -83,8 +84,12 @@ import java.util.Map;
*/ */
public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalAnnotation { public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalAnnotation {
private final static Logger logger = Logger.getLogger(HardyWeinberg.class);
private boolean ANNOTATE_INDELS = true; private boolean ANNOTATE_INDELS = true;
private final static int BAD_RUN_LENGTH = -1;
public Map<String, Object> annotate(final RefMetaDataTracker tracker, public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker, final AnnotatorCompatible walker,
final ReferenceContext ref, final ReferenceContext ref,
@ -104,6 +109,9 @@ public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalA
return null; return null;
} }
if ( run == BAD_RUN_LENGTH )
return null;
Map<String, Object> map = new HashMap<>(); Map<String, Object> map = new HashMap<>();
map.put(getKeyNames().get(0), String.format("%d", run)); map.put(getKeyNames().get(0), String.format("%d", run));
return map; return map;
@ -149,15 +157,13 @@ public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalA
GenomeLoc window = ref.getWindow(); GenomeLoc window = ref.getWindow();
int refBasePos = (int) (locus.getStart() - window.getStart())+1; int refBasePos = (int) (locus.getStart() - window.getStart())+1;
if ( vc.isSimpleDeletion() ) { if ( vc.isSimpleDeletion() ) {
// check that deleted bases are the same if ( refBasePos + vc.getReference().length() - 1 >= bases.length ) {
byte dBase = bases[refBasePos]; logger.warn("Encountered a homopolymer at " + locus.toString() + " longer than the tool's default window size, so the position was skipped. " +
for ( int i = 0; i < vc.getReference().length(); i ++ ) { "To process this position, add --reference_window_stop to your command with a value equal or greater than " + vc.getReference().length());
if ( bases[refBasePos+i] != dBase ) { return BAD_RUN_LENGTH;
return 0;
}
} }
return computeHomopolymerRun(dBase, ref); return computeHomopolymerRun(bases[refBasePos], ref);
} else { } else {
// check that inserted bases are the same // check that inserted bases are the same
byte insBase = vc.getAlternateAllele(0).getBases()[0]; byte insBase = vc.getAlternateAllele(0).getBases()[0];

View File

@ -505,4 +505,22 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
Assert.assertFalse(lineIterator.hasNext()); Assert.assertFalse(lineIterator.hasNext());
Assert.assertFalse(lineIteratorAnn.hasNext()); Assert.assertFalse(lineIteratorAnn.hasNext());
} }
@Test
public void testHomopolymerRunWindow() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " -A HomopolymerRun --variant:vcf " + privateTestDir + "problem_del.vcf " +
"-U ALLOW_SEQ_DICT_INCOMPATIBILITY -L chr18:44382010-44384010 --reference_window_stop 59 --no_cmdline_in_header -o %s", 1,
Arrays.asList("bda55495578147b2390d850d7fb25a12"));
executeTest("Testing testHomopolymerRunWindow", spec);
}
@Test
public void testHomopolymerRunTooBig() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19ReferenceWithChrPrefixInChromosomeNames + " -A HomopolymerRun --variant:vcf " + privateTestDir + "problem_del.vcf " +
"-U ALLOW_SEQ_DICT_INCOMPATIBILITY -L chr18:44382010-44384010 --no_cmdline_in_header -o %s", 1,
Arrays.asList("e20b12fd45f37a7bb31a2f2e91983477"));
executeTest("Testing HomopolymerRunTooBig", spec);
}
} }